#
diff --git a/news/index.html b/news/index.html
index 5c66f18ad0a..06c8a9f2683 100644
--- a/news/index.html
+++ b/news/index.html
@@ -3,7 +3,7 @@
-
+
@@ -33,11 +33,11 @@
-
+
-
+
-
+
diff --git a/resources/js/config.js b/resources/js/config.js
index a5f73acc4fb..eb62723720c 100644
--- a/resources/js/config.js
+++ b/resources/js/config.js
@@ -1 +1 @@
-var __DOCS_CONFIG__ = {"id":"196d6NgPeUOowdRcAoB7UKXHbw/aXoxcTs0","key":"m0pCI5a9x1ylcGWYb6laj0miwYOWRzLXpZZK5jN5HqM.JIx+ejbxPTFC92RONDh4QsXTG7fUyEYz6DogIdRubRSUHuWvxGkqCN9c3Z9gwIqusE8sra3vkAdcahBulnYXKg.8319","base":"/","host":"docs.cbioportal.org","version":"1.0.0","useRelativePaths":true,"documentName":"index.html","appendDocumentName":false,"trailingSlash":true,"preloadSearch":false,"cacheBustingToken":"3.5.0.752876107743","cacheBustingStrategy":"query","sidebarFilterPlaceholder":"Filter","toolbarFilterPlaceholder":"Filter","showSidebarFilter":true,"filterNotFoundMsg":"No member names found containing the query \"{query}\"","maxHistoryItems":15,"homeIcon":"","access":[{"value":"public","label":"Public"},{"value":"protected","label":"Protected"}],"toolbarLinks":[{"id":"fields","label":"Fields"},{"id":"properties","label":"Properties"},{"id":"methods","label":"Methods"},{"id":"events","label":"Events"}],"sidebar":[{"n":"/","l":"Home"},{"n":"/about-us","l":"About Us"},{"n":"/rfc-list","l":"List of RFCs"},{"n":"/user-guide","l":"User Guide","i":[{"n":"/user-guide/new-users","l":"New Users"},{"n":"/user-guide/faq","l":"Frequently Asked Questions"},{"n":"/user-guide/overview","l":"Overview of Resources"},{"n":"/user-guide/by-page","l":"Page Specific Resources"},{"n":"/user-guide/oql","l":"OQL"},{"n":"/news","l":"News"},{"n":"/news-genie","l":"Genie News"}]},{"n":"/web-api-and-clients","l":"Web API and API Clients"},{"n":"/deployment","l":"Deployment","i":[{"n":"/architecture-overview","l":"Architecture overview"},{"n":"/hardware-requirements","l":"Hardware Requirements"},{"n":"/deployment/docker","l":"Deploy with Docker (Recommended)","i":[{"n":"/deployment/docker/import_data","l":"Import data with Docker"},{"n":"/deployment/docker/example_commands","l":"Example Commands"},{"n":"/deployment/docker/using-keycloak","l":"Authenticating and Authorizing Users using Keycloak in Docker"}]},{"n":"/deploy-without-docker","l":"Deploy without Docker","c":false,"i":[{"n":"/deployment/deploy-without-docker/software-requirements","l":"Software Requirements"},{"n":"/deployment/deploy-without-docker/pre-build-steps","l":"Pre-Build Steps"},{"n":"/deployment/deploy-without-docker/build-from-source","l":"Building from Source"},{"n":"/deployment/deploy-without-docker/import-the-seed-database","l":"Importing the Seed Database"},{"n":"/deployment/deploy-without-docker/deploying","l":"Deploying the Web Application"},{"n":"/deployment/deploy-without-docker/load-sample-cancer-study","l":"Loading a Sample Study"}]},{"n":"/authorization-and-authentication","l":"Authorization and Authentication","c":false,"i":[{"n":"/deployment/authorization-and-authentication/user-authorization","l":"User Authorization"},{"n":"/deployment/authorization-and-authentication/authenticating-users-via-saml","l":"Authenticating Users via SAML"},{"n":"/deployment/authorization-and-authentication/authenticating-users-via-ldap","l":"Authenticating Users via LDAP"},{"n":"/deployment/authorization-and-authentication/authenticating-and-authorizing-users-via-keycloak","l":"Authenticating and Authorizing Users via Keycloak"},{"n":"/deployment/authorization-and-authentication/authenticating-users-via-tokens","l":"Authenticating Users via Tokens"}]},{"n":"/customization","l":"Customization","c":false,"i":[{"n":"/deployment/customization/customizing-your-instance-of-cbioportal","l":"Customizing your cBioPortal Instance via portal.properties"},{"n":"/deployment/customization/portal.properties-reference","l":"More portal.properties Settings"},{"n":"/deployment/customization/caching","l":"Configuring Caching Behavior"},{"n":"/deployment/customization/studyview","l":"How does the study view organize the charts"},{"n":"/deployment/customization/studyview","l":"Utilize priority setting from database to visualize charts"}]},{"n":"/integration-with-other-webservices","l":"Integration with Other Webservices","c":false,"i":[{"n":"/deployment/integration-with-other-webservices/oncokb-data-access","l":"OncoKB Data Access"}]},{"n":"/data-loading","l":"Data Loading Overview","i":[{"n":"/downloads","l":"Downloads"},{"n":"/using-the-dataset-validator","l":"Using the Dataset Validator"},{"n":"/using-the-metaimport-script","l":"Using the metaImport Script"},{"n":"/file-formats","l":"File Formats","i":[{"n":"/z-score-normalization-script","l":"Z-Score Normalization"}]},{"n":"/study-curation-guide","l":"Study Curation Guide"},{"n":"/data-loading-maintaining-studies","l":"Maintaining Studies"},{"n":"/data-loading-for-developers","l":"For Developers"},{"n":"/data-loading-tips-and-best-practices","l":"Tips and Best Practices"},{"n":"/mutation-data-transcript-annotation","l":"Mutation Data Transcript Annotation"},{"n":"/import-oncokb-annotations-as-custom-driver-annotations","l":"Import OncoKB annotations as custom driver annotations"},{"n":"/import-gene-sets","l":"Importing Gene Sets for Gene Set Scoring Data"},{"n":"/import-gene-panels","l":"Importing Gene Panels"},{"n":"/deployment/customization/studyview","l":"Study View Customization with Priority Data"}]},{"n":"/cbioportal-maintenance","l":"cBioPortal Maintenance","c":false,"i":[{"n":"/updating-your-cbioportal-installation","l":"Updating your cBioPortal Database Scheme"},{"n":"/updating-gene-and-gene_alias-tables","l":"Update genes and gene aliases"},{"n":"/migration-guide","l":"Migration Guide"},{"n":"/msk-maintenance","l":"MSK Maintenance"}]}]},{"n":"/development","l":"Development","i":[{"n":"/development/feature-development-guide","l":"Feature Development Guide"},{"n":"/development/cbioportal-er-diagram","l":"cBioPortal Entity-relationship Diagram"},{"n":"/development/database-versioning","l":"cBioPortal Database Versioning"},{"n":"/development/build-different-frontend","l":"Build cBioPortal with a different frontend version"},{"n":"/development/session-service-working","l":"Session Service"},{"n":"/development/manual-test-cases","l":"Manual test cases"},{"n":"/development/release-procedure","l":"Release Procedure"},{"n":"/development/deployment-procedure","l":"Deployment Procedure"},{"n":"/development/documentation-site","l":"This documentation site"}]}],"search":{"mode":0,"minChars":2,"maxResults":20,"placeholder":"Search","hotkeys":["k"],"noResultsFoundMsg":"Sorry, no results found.","recognizeLanguages":true,"languages":[0],"preload":false},"resources":{"History_Title_Label":"History","History_ClearLink_Label":"Clear","History_NoHistory_Label":"No history items","API_AccessFilter_Label":"Access","API_ParameterSection_Label":"PARAMETERS","API_SignatureSection_Label":"SIGNATURE","API_CopyHint_Label":"Copy","API_CopyNameHint_Label":"Copy name","API_CopyLinkHint_Label":"Copy link","API_CopiedAckHint_Label":"Copied!","API_MoreOverloads_Label":"more","API_MoreDropdownItems_Label":"More","API_OptionalParameter_Label":"optional","API_DefaultParameterValue_Label":"Default value","API_InheritedFilter_Label":"Inherited","Search_Input_Placeholder":"Search","Toc_Contents_Label":"Contents","Toc_RelatedClasses_Label":"Related Classes","History_JustNowTime_Label":"just now","History_AgoTime_Label":"ago","History_YearTime_Label":"y","History_MonthTime_Label":"mo","History_DayTime_Label":"d","History_HourTime_Label":"h","History_MinuteTime_Label":"m","History_SecondTime_Label":"s"}};
+var __DOCS_CONFIG__ = {"id":"RCMqriZD/a5jaS4TBXcJXbDq31964yFWS7c","key":"mrcujrD9PY13GdnJB7wCbPGaXcVqPaxrvgiNfeJd3tk.sFJl3L+00KdXGJjMGJjOFVzA22kaL4NaSLBhbXeC3siyGBI9aCbnKIxKL8tZad7+FQvZ2IjfXErLRHl5WFdlkA.8284","base":"/","host":"docs.cbioportal.org","version":"1.0.0","useRelativePaths":true,"documentName":"index.html","appendDocumentName":false,"trailingSlash":true,"preloadSearch":false,"cacheBustingToken":"3.5.0.752877399573","cacheBustingStrategy":"query","sidebarFilterPlaceholder":"Filter","toolbarFilterPlaceholder":"Filter","showSidebarFilter":true,"filterNotFoundMsg":"No member names found containing the query \"{query}\"","maxHistoryItems":15,"homeIcon":"","access":[{"value":"public","label":"Public"},{"value":"protected","label":"Protected"}],"toolbarLinks":[{"id":"fields","label":"Fields"},{"id":"properties","label":"Properties"},{"id":"methods","label":"Methods"},{"id":"events","label":"Events"}],"sidebar":[{"n":"/","l":"Home"},{"n":"/about-us","l":"About Us"},{"n":"/rfc-list","l":"List of RFCs"},{"n":"/user-guide","l":"User Guide","i":[{"n":"/user-guide/new-users","l":"New Users"},{"n":"/user-guide/faq","l":"Frequently Asked Questions"},{"n":"/user-guide/overview","l":"Overview of Resources"},{"n":"/user-guide/by-page","l":"Page Specific Resources"},{"n":"/user-guide/oql","l":"OQL"},{"n":"/news","l":"News"},{"n":"/news-genie","l":"Genie News"}]},{"n":"/web-api-and-clients","l":"Web API and API Clients"},{"n":"/deployment","l":"Deployment","i":[{"n":"/architecture-overview","l":"Architecture overview"},{"n":"/hardware-requirements","l":"Hardware Requirements"},{"n":"/deployment/docker","l":"Deploy with Docker (Recommended)","i":[{"n":"/deployment/docker/import_data","l":"Import data with Docker"},{"n":"/deployment/docker/example_commands","l":"Example Commands"},{"n":"/deployment/docker/using-keycloak","l":"Authenticating and Authorizing Users using Keycloak in Docker"}]},{"n":"/deploy-without-docker","l":"Deploy without Docker","c":false,"i":[{"n":"/deployment/deploy-without-docker/software-requirements","l":"Software Requirements"},{"n":"/deployment/deploy-without-docker/pre-build-steps","l":"Pre-Build Steps"},{"n":"/deployment/deploy-without-docker/build-from-source","l":"Building from Source"},{"n":"/deployment/deploy-without-docker/import-the-seed-database","l":"Importing the Seed Database"},{"n":"/deployment/deploy-without-docker/deploying","l":"Deploying the Web Application"},{"n":"/deployment/deploy-without-docker/load-sample-cancer-study","l":"Loading a Sample Study"}]},{"n":"/authorization-and-authentication","l":"Authorization and Authentication","c":false,"i":[{"n":"/deployment/authorization-and-authentication/user-authorization","l":"User Authorization"},{"n":"/deployment/authorization-and-authentication/authenticating-users-via-saml","l":"Authenticating Users via SAML"},{"n":"/deployment/authorization-and-authentication/authenticating-users-via-ldap","l":"Authenticating Users via LDAP"},{"n":"/deployment/authorization-and-authentication/authenticating-and-authorizing-users-via-keycloak","l":"Authenticating and Authorizing Users via Keycloak"},{"n":"/deployment/authorization-and-authentication/authenticating-users-via-tokens","l":"Authenticating Users via Tokens"}]},{"n":"/customization","l":"Customization","c":false,"i":[{"n":"/deployment/customization/customizing-your-instance-of-cbioportal","l":"Customizing your cBioPortal Instance via portal.properties"},{"n":"/deployment/customization/portal.properties-reference","l":"More portal.properties Settings"},{"n":"/deployment/customization/caching","l":"Configuring Caching Behavior"},{"n":"/deployment/customization/studyview","l":"How does the study view organize the charts"},{"n":"/deployment/customization/studyview","l":"Utilize priority setting from database to visualize charts"}]},{"n":"/integration-with-other-webservices","l":"Integration with Other Webservices","c":false,"i":[{"n":"/deployment/integration-with-other-webservices/oncokb-data-access","l":"OncoKB Data Access"}]},{"n":"/data-loading","l":"Data Loading Overview","i":[{"n":"/downloads","l":"Downloads"},{"n":"/using-the-dataset-validator","l":"Using the Dataset Validator"},{"n":"/using-the-metaimport-script","l":"Using the metaImport Script"},{"n":"/file-formats","l":"File Formats","i":[{"n":"/z-score-normalization-script","l":"Z-Score Normalization"}]},{"n":"/study-curation-guide","l":"Study Curation Guide"},{"n":"/data-loading-maintaining-studies","l":"Maintaining Studies"},{"n":"/data-loading-for-developers","l":"For Developers"},{"n":"/data-loading-tips-and-best-practices","l":"Tips and Best Practices"},{"n":"/mutation-data-transcript-annotation","l":"Mutation Data Transcript Annotation"},{"n":"/import-oncokb-annotations-as-custom-driver-annotations","l":"Import OncoKB annotations as custom driver annotations"},{"n":"/import-gene-sets","l":"Importing Gene Sets for Gene Set Scoring Data"},{"n":"/import-gene-panels","l":"Importing Gene Panels"},{"n":"/deployment/customization/studyview","l":"Study View Customization with Priority Data"}]},{"n":"/cbioportal-maintenance","l":"cBioPortal Maintenance","c":false,"i":[{"n":"/updating-your-cbioportal-installation","l":"Updating your cBioPortal Database Scheme"},{"n":"/updating-gene-and-gene_alias-tables","l":"Update genes and gene aliases"},{"n":"/migration-guide","l":"Migration Guide"},{"n":"/msk-maintenance","l":"MSK Maintenance"}]}]},{"n":"/development","l":"Development","i":[{"n":"/development/feature-development-guide","l":"Feature Development Guide"},{"n":"/development/cbioportal-er-diagram","l":"cBioPortal Entity-relationship Diagram"},{"n":"/development/database-versioning","l":"cBioPortal Database Versioning"},{"n":"/development/build-different-frontend","l":"Build cBioPortal with a different frontend version"},{"n":"/development/session-service-working","l":"Session Service"},{"n":"/development/manual-test-cases","l":"Manual test cases"},{"n":"/development/release-procedure","l":"Release Procedure"},{"n":"/development/deployment-procedure","l":"Deployment Procedure"},{"n":"/development/documentation-site","l":"This documentation site"}]}],"search":{"mode":0,"minChars":2,"maxResults":20,"placeholder":"Search","hotkeys":["k"],"noResultsFoundMsg":"Sorry, no results found.","recognizeLanguages":true,"languages":[0],"preload":false},"resources":{"History_Title_Label":"History","History_ClearLink_Label":"Clear","History_NoHistory_Label":"No history items","API_AccessFilter_Label":"Access","API_ParameterSection_Label":"PARAMETERS","API_SignatureSection_Label":"SIGNATURE","API_CopyHint_Label":"Copy","API_CopyNameHint_Label":"Copy name","API_CopyLinkHint_Label":"Copy link","API_CopiedAckHint_Label":"Copied!","API_MoreOverloads_Label":"more","API_MoreDropdownItems_Label":"More","API_OptionalParameter_Label":"optional","API_DefaultParameterValue_Label":"Default value","API_InheritedFilter_Label":"Inherited","Search_Input_Placeholder":"Search","Toc_Contents_Label":"Contents","Toc_RelatedClasses_Label":"Related Classes","History_JustNowTime_Label":"just now","History_AgoTime_Label":"ago","History_YearTime_Label":"y","History_MonthTime_Label":"mo","History_DayTime_Label":"d","History_HourTime_Label":"h","History_MinuteTime_Label":"m","History_SecondTime_Label":"s"}};
diff --git a/resources/js/search.json b/resources/js/search.json
index 1f3235ed45f..f4443fa197d 100644
--- a/resources/js/search.json
+++ b/resources/js/search.json
@@ -1 +1 @@
-[[{"i":"welcome-to-the-documentation-for-cbioportal","l":"Welcome to the documentation for cBioPortal!","p":["We've organized this site by the different roles in the cBioPortal community.","cBioPortal Google Group","We maintain an active list of RFCs (Requests for Comments) where we describe new features and solicit community feedback.","FAQ","Tutorials","API documentation"]}],[{"l":"About Us","p":["The cBioPortal for Cancer Genomics was originally developed at Memorial Sloan Kettering Cancer Center(MSK). The public cBioPortal site is hosted by the Center for Molecular Oncology at MSK. The cBioPortal software is now available under an open source license via GitHub. The software is now developed and maintained by a multi-institutional team, consisting of MSK, the Dana Farber Cancer Institute, Princess Margaret Cancer Centre in Toronto, Children's Hospital of Philadelphia, Caris Life Sciences, The Hyve and SE4BIO in the Netherlands, and Bilkent University in Ankara, Turkey."]},{"l":"Memorial Sloan Kettering Cancer Center","p":["Aaron Lisman","Angelica Ochoa","Anusha Satravada","Avery Wang","Benjamin Gross","Bryan Lai","Calla Chennault","Gaofei Zhao","Hongxin Zhang","Ino de Bruijn","Manda Wilson","Nikolaus Schultz","Ramyasree Madupuri","Rima AlHamad","Ritika Kundra","Robert Sheridan","S Onur Sumer","Xiang Li"]},{"l":"Dana-Farber Cancer Institute","p":["Ethan Cerami","Tali Mazor","Jeremy Easton-Marks","Zhaoyuan (Ryan) Fu","Augustin Luna","James Lindsay","Chris Sander"]},{"i":"princess-margaret-cancer-centre-toronto","l":"Princess Margaret Cancer Centre, Toronto","p":["Prasanna Jagannathan","Trevor Pugh"]},{"i":"childrens-hospital-of-philadelphia","l":"Children's Hospital of Philadelphia","p":["Charles Haynes","David Higgins","Allison Heath","John Maris","Adam Resnick","Miguel Brown"]},{"l":"Caris Life Sciences","p":["Jianjiong Gao","Priti Kumari","Karthik Kalletla"]},{"l":"The Hyve","p":["Oleguer Plantalech","Pim van Nierop","Sander Rodenburg","Bas Leenknegt","Elena G Lara","Jessica Singh","Matthijs Pon","Tim Kuijpers","Mirella Kalafati","Sjoerd van Hagen"]},{"l":"SE4BIO","p":["Pieter Lukasse","Ruslan Forostianov"]},{"l":"Bilkent University","p":["Ugur Dogrusoz","Yusuf Ziya Ozgul"]},{"l":"Alumni","p":["Adam Abeshouse","Alexandros Sigaras","Anders Jacobsen","Andy Dufilie","Arthur Goldberg","B Arman Aksoy","Caitlin Byrne","Catherine Del Vecchio Fitz","Diana Baiceanu","Dionne Zaal","Divya Madala","Dong Li","Erik Larsson","Ersin Ciftci","Fedde Schaeffer","Fred Criscuolo","Gideon Dresdner","Hsiao-Wei Chen","Irina Pulyakhina","Istemi Bahceci","James Xu","Jiaojiao Wang","Jing Su","Kaan Sancak","Kees van Bochove","Kelsey Zhu","Leonard Dervishi","Luke Sikina","M Furkan Sahin","M Salih Altun","Michael Heuer","Ngoc Nguyen","Olivier Elemento","Paul van Dijk","Peter Kok","Pichai Raman","Riza Nugraha","Sander Tan","Stuart Watt","Tamba Monrose","Yichao Sun","Zachary Heins","Ziya Erkoc"]},{"i":"funding-for-the-cbioportal-for-cancer-genomics-is-or-has-been-provided-by","l":"Funding for the cBioPortal for Cancer Genomics is or has been provided by:"},{"i":"current","l":"Current:","p":["NCI, through ITCR grant NCI-U24CA274633 and HTAN grant NCI-U24CA233243","Marie-José and Henry R. Kravis Center for Molecular Oncology at MSK","Dana Farber Cancer Institute","American Association for Cancer Research through AACR Project GENIE","Prostate Cancer Foundation","The Cholangiocarcinoma Foundation","Robertson Foundation"]},{"i":"past","l":"Past:","p":["NCI, through ITCR grant NCI-U24CA220457","Stand Up 2 Cancer","The Ben & Catherine Ivy Foundation","NCI, as a TCGA Genome Data Analysis Center (GDAC)(NCI-U24CA143840)","NCRR, as the National Resource for Network Biology (NRNB) Research Resource (RR 031228-02)","Starr Cancer Consortium","Breast Cancer Research Foundation","Adenoid Cystic Carcinoma Research Foundation","POETIC Consortium","Parker Institute for Cancer Immunotherapy"]}],[{"l":"List of Active RFCs","p":["We maintain an active set of RFCs (Requests for Comments) where we spec out new features and solicit community feedback.","See this shared google folder for the list of RFCs."]},{"l":"For Developers Creating new RFCs","p":["Use the RFC Template","Create your RFC within this shared google folder, and pick a new incremental number.","Add a Link to your RFC on this page."]}],[{"l":"User Guide","p":["The cBioPortal for Cancer Genomics is a resource for interactive exploration of multidimensional cancer genomics data sets. The goal of cBioPortal is to significantly lower the barriers between complex genomic data and cancer researchers by providing rapid, intuitive, and high-quality access to molecular profiles and clinical attributes from large-scale cancer genomics projects, and therefore to empower researchers to translate these rich data sets into biologic insights and clinical applications.","The following section guides users in performing queries and analysis on any instance of cBioportal."]}],[{"l":"New Users","p":["Are you new to cBioPortal? Welcome! We have a few options to help you get started.","If you have an hour, we highly recommend watching the recording of our Introduction to cBioPortal webinar ( youtube.com or bilibili.com).","Don't have an hour? Review our tutorial slides for exploring a study ( Google slides or PDF) and running a query ( Google slides or PDF)","Or, watch two of our short how-to videos which demonstrate how to explore a study ( youtube.com) and how to run a query ( youtube.com)."]}],[{"l":"cBioPortal FAQs","p":["Analysis Questions","Are there any normal tissue samples available through cBioPortal?","Can I change the order of genes in the OncoPrint?","Can I create a local instance of cBioPortal to host my own data?","Can I download all data at once?","Can I save or bookmark my results in cBioPortal?","Can I use cBioPortal with my own data?","Can I use figures from the cBioPortal in my publications or presentations?","Can I visualize my own data within an OncoPrint?","Clinical Data","Data Questions","DNA (Mutations, Copy Number & Fusions)","DNA Methylation","Does the cBioPortal contain synonymous mutation data?","Does the cBioPortal provide a Web Service API? R interface? MATLAB interface?","Does the Mutual Exclusivity tab calculate its statistics using all samples/alterations or only a specific subset?","Does the portal contain cancer study X?","Does the portal store raw or probe-level data?","Does the portal work on all browsers and operating systems?","General Data","General Questions","How are protein domains in the mutational lollipop diagrams specified?","How can I compare outcomes in patients with high vs low expression of a gene?","How can I compare two or more subsets of samples?","How can I create a subset or sub cohort of a study with specific samples or patients?","How can I download the PanCancer Atlas data?","How can I find which studies have mRNA expression data (or any other specific data type)?","How can I form a combined Study?","How can I query microRNAs in the portal?","How can I query over/under expression of a gene?","How can I query phosphoprotein levels in the portal?","How can I query/explore a select subset of samples?","How do I access data from AACR Project GENIE?","How do I cite the cBioPortal?","How do I get started?","How do I get updates on new portal developments and new data sets?","How does cBioPortal handle duplicate samples or sample IDs across different studies?","How does TCGA data in cBioPortal compare to TCGA data in Genome Data Commons?","How is TCGA RNASeqV2 processed? What units are used?","How is the cBioPortal for Cancer Genomics different from the Genomic Data Commons (GDC)?","How to use filter in the URL of Study View page?","I'd like to contribute code to the cBioPortal. How do I get started?","Is it necessary to log in to use virtual studies? If I do log in, what additional functionality do I gain?","Is it possible to determine if a particular mutation is heterozygous or homozygous in a sample? When a sample has 2 mutations in one gene, is it possible to determine whether the mutations are in cis or in trans with each other?","Is there any normal RNA-seq data in cBioPortal?","Is there microRNA data?","OncoPrint","Other pages","Protein","Results View","RNA","Study View","TCGA","The data today is different than the last time i looked. What happened?","What are mRNA and microRNA Z-Scores?","What are OncoPrints?","What are TCGA Firehose Legacy datasets and how do they compare to the publication-associated datasets and the PanCancer Atlas datasets?","What are the statistical significance tests in Group Comparison?","What are the values of the box and whiskers in a boxplot?","What data types are in the portal?","What do “Amplification”, “Gain”, “Deep Deletion”, “Shallow Deletion” and \"-2\", \"-1\", \"0\", \"1\", and \"2\" mean in the copy-number data?","What does ___ stand for?","What happened to TCGA Provisional datasets?","What if I have other questions or comments?","What is a combined Study?","What is a Virtual Study?","What is GISTIC? What is RAE?","What is Group Comparison?","What is the cBioPortal for Cancer Genomics?","What is the difference between a “splice site” mutation and a “splice region” mutation?","What is the meaning of OS_STATUS / OS_MONTHS, and PFS_STATUS / PFS_MONTHS?","What is the process of data curation?","What kind of clinical data is stored in the portal?","What processing or filtering is applied to generate the mutation data?","What transcripts are used for annotating mutations?","What version of the human reference genome is being used in cBioPortal?","Where do the thresholded copy number call in TCGA Firehose Legacy data come from?","Which methylation probe is used for genes with multiple probes?","Which resources are integrated for variant annotation?","Which studies have MutSig and GISTIC results? How do these results compare to the data in the TCGA publications?","Why are some samples “Not profiled” for certain genes?","Why isn’t there protein data for my gene of interest?"]},{"l":"General Questions"},{"i":"what-is-the-cbioportal-for-cancer-genomics","l":"What is the cBioPortal for Cancer Genomics?","p":["The cBioPortal for Cancer Genomics is an open-access, open-source resource for interactive exploration of multidimensional cancer genomics data sets. The goal of cBioPortal is to significantly lower the barriers between complex genomic data and cancer researchers by providing rapid, intuitive, and high-quality access to molecular profiles and clinical attributes from large-scale cancer genomics projects, and therefore to empower researchers to translate these rich data sets into biologic insights and clinical applications."]},{"i":"how-do-i-get-started","l":"How do I get started?","p":["Check out our tutorial slides to get started or go through our tutorial paper."]},{"i":"what-data-types-are-in-the-portal","l":"What data types are in the portal?","p":["The portal supports and stores non-synonymous mutations, DNA copy-number data (putative, discrete values per gene, e.g. \"deeply deleted\" or \"amplified\", as well as log2 or linear copy number data), mRNA and microRNA expression data, protein-level and phosphoprotein level data (RPPA or mass spectrometry based), DNA methylation data, and de-identified clinical data. For a complete breakdown of available data types per cancer study go to the Data Sets Page. Note that for many studies, only somatic mutation data and limited clinical data are available. For TCGA studies, the other data types are also available. Germline mutations are supported by cBioPortal, but are, with a few exceptions, not available in the public instance."]},{"i":"what-does-___-stand-for","l":"What does ___ stand for?","p":["Here are the meanings of some of the abbreviations used by cBioPortal:","VUS: variant of unknown significance","CNA: copy number alteration","AMP: amplification","HOMDEL: deep deletion","TMB: tumor mutational burden, calculated as mutations per megabase of sequenced DNA","KM: Kaplan-Meier","MSI: microsatellite instability","OQL: Onco Query Language, used within cBioPortal to define the types of alterations included in a query. For more on OQL, review the documentation, tutorial slides, and videos"]},{"i":"what-is-the-process-of-data-curation","l":"What is the process of data curation?","p":["The TCGA firehose legacy datasets are imported directly from the original TCGA Data Coordinating Center via the Broad Firehose.","We are also actively curating datasets from the literature. Studies from the literature were curated from the data published with the manuscripts. We sometimes reach out to the investigators to acquire additional data, such as clinical attributes. All mutation calls (in VCF or MAF format) are processed through an internal pipeline to annotate the variant effects in a consistent way across studies. Please contact us to suggest additional public datasets to curate or view the list of studies suggested for curation in our Datahub on Github."]},{"i":"how-do-i-get-updates-on-new-portal-developments-and-new-data-sets","l":"How do I get updates on new portal developments and new data sets?","p":["Please subscribe to our low-volume news mailing list or follow @cbioportal on Twitter."]},{"i":"does-the-portal-work-on-all-browsers-and-operating-systems","l":"Does the portal work on all browsers and operating systems?","p":["We support and test on the following web browsers: Safari, Google Chrome, Firefox and Edge. (As of release v3.5.4 we no longer support Internet Explorer 11). If you notice any incompatibilities, please let us know."]},{"i":"how-do-i-cite-the-cbioportal","l":"How do I cite the cBioPortal?","p":["Please cite the following portal papers:","Cerami et al. The cBio Cancer Genomics Portal: An Open Platform for Exploring Multidimensional Cancer Genomics Data. Cancer Discovery. May 2012 2; 401. PubMed.","Gao et al. Integrative analysis of complex cancer genomics and clinical profiles using the cBioPortal. Sci. Signal. 6, pl1 (2013). PubMed.","de Bruijn et al. Analysis and Visualization of Longitudinal Genomic and Clinical Data from the AACR Project GENIE Biopharma Collaborative in cBioPortal. Cancer Res (2023). PubMed.","Remember also to cite the source of the data if you are using a publicly available dataset."]},{"i":"can-i-use-figures-from-the-cbioportal-in-my-publications-or-presentations","l":"Can I use figures from the cBioPortal in my publications or presentations?","p":["Yes, you are free to use any of the figures from the portal in your publications or presentations (many are available in SVG or PDF format for easier scaling and editing). When you do, please cite Cerami et al., Cancer Discov. 2012, and Gao et al., Sci. Signal. 2013 (see the previous question for full citations)."]},{"i":"can-i-save-or-bookmark-my-results-in-cbioportal","l":"Can I save or bookmark my results in cBioPortal?","p":["You can bookmark your query results and share the URL with collaborators. We store all queries via Session IDs, and these are saved indefinitely. Use the bookmark tab to retrieve the full link, or generate a short link via the bit.ly link generator."]},{"i":"how-is-the-cbioportal-for-cancer-genomics-different-from-the-genomic-data-commons-gdc","l":"How is the cBioPortal for Cancer Genomics different from the Genomic Data Commons (GDC)?","p":["The cBioPortal is an exploratory analysis tool for exploring large-scale cancer genomic data sets that hosts data from large consortium efforts, like TCGA and TARGET, as well as publications from individual labs. You can quickly view genomic alterations across a set of patients, across a set of cancer types, perform survival analysis and perform group comparisons. If you want to explore specific genes or a pathway of interest in one or more cancer types, the cBioPortal is probably where you want to start.","By contrast, the Genomic Data Commons (GDC) aims to be the definitive place for full-download and access to all data generated by TCGA and TARGET. If you want to download raw mRNA expression files or full segmented copy number files, the GDC is probably where you want to start."]},{"i":"does-the-cbioportal-provide-a-web-service-api-r-interface-matlab-interface","l":"Does the cBioPortal provide a Web Service API? R interface? MATLAB interface?","p":["Yes, the cBioPortal provides a Swagger API, and R/MATLAB interfaces."]},{"i":"can-i-use-cbioportal-with-my-own-data","l":"Can I use cBioPortal with my own data?","p":["cBioPortal provides several options for analyzing your own data. Visit our Visualize Your Data page to generate an OncoPrint or Lollipop Plot with your own data. To utilize the entire suite of analysis and visualization tools, you can also install your own instance of cBioPortal (see next question)."]},{"i":"can-i-create-a-local-instance-of-cbioportal-to-host-my-own-data","l":"Can I create a local instance of cBioPortal to host my own data?","p":["Yes, the cBioPortal is open-source, and available on GitHub. Our documentation provides complete download and installation instructions."]},{"i":"id-like-to-contribute-code-to-the-cbioportal-how-do-i-get-started","l":"I'd like to contribute code to the cBioPortal. How do I get started?","p":["Great! We would love to have your contributions. To get started, head over to our GitHub repository and check out our page on how to contribute."]},{"i":"what-is-a-combined-study","l":"What is a combined Study?","p":["A combined study is a custom study comprised of samples from multiple studies. The combined study feature enables you to combine samples from multiple studies to form a bigger study. This cohort of samples can then be queried or explored just like a traditional study, and can be returned to at a later date or shared with a collaborator."]},{"i":"how-can-i-form-a-combined-study","l":"How can I form a combined Study?","p":["A combined or merged study is a custom study comprised of samples from multiple studies. In the homepage of cbioportal, studies can be selected using the checkbox located on the left of the study. Once the studies are selected, they can be combined and explored using the \"Explore Selected Studies\" button. Alternatively, after the studies are selected, you can run queries on the combined study using the \"Query by Gene\" button."]},{"i":"how-can-i-create-a-subset-or-sub-cohort-of-a-study-with-specific-samples-or-patients","l":"How can I create a subset or sub cohort of a study with specific samples or patients?","p":["A subset or sub cohort of a study can be created by specifying individual patients or samples. After a study is selected, user can click on the \"Custom selection\" button to create a new filter by specifying the sampleID or patientID that the user is interested to explore. Another way is to filter a set of patients using the charts on the study view and then view the IDs of the patients and samples that were selected or unselected based on the current filter."]},{"i":"what-is-a-virtual-study","l":"What is a Virtual Study?","p":["A virtual study is a custom study comprised of samples from one or more existing studies. The virtual study feature allows you to define a custom cohort of samples that fit your specific genomic or clinical criteria of interest. These samples can be a subset of the data available in an existing study, or result from the combination of multiple existing studies. This cohort of samples can then be queried or explored just like a traditional study, and can be returned to at a later date or shared with a collaborator. For more information and examples, see our tutorial on virtual studies."]},{"i":"is-it-necessary-to-log-in-to-use-virtual-studies-if-i-do-log-in-what-additional-functionality-do-i-gain","l":"Is it necessary to log in to use virtual studies? If I do log in, what additional functionality do I gain?","p":["No. A user that has not logged in can create virtual studies and run queries in those studies (by using the query box on the study summary page). Links to virtual studies are permanent, so you can save the link on your computer and come back to it anytime, or share it with others.","If you log in, you gain the ability to save your virtual study to the list of existing studies on the homepage. This makes a virtual study functionally the same as any other study: you can access your virtual studies in the query builder and you can combine an existing virtual study with any other study to create a new virtual study."]},{"i":"what-is-group-comparison","l":"What is Group Comparison?","p":["Group Comparison is a suite of analysis features which allows a user to compare clinical or genomic features of user-defined groups of samples. These groups can be defined based on any clinical or genomic features. For an overview, see our tutorial on group comparison."]},{"i":"what-are-the-statistical-significance-tests-in-group-comparison","l":"What are the statistical significance tests in Group Comparison?","p":["Survival tab: Log-rank test","Clinical tab:","Continuous data: Chi-squared test","Categorical data: Kruskal Wallis test","Other tabs","2 groups","Continuous data: one-sided t-test","Categorical data: two-sided Fisher's exact test","3 or more groups","Continuous data: one-way ANOVA","Categorical data: Chi-squared test"]},{"l":"Data Questions"},{"l":"General Data"},{"i":"does-the-portal-contain-cancer-study-x","l":"Does the portal contain cancer study X?","p":["Check out the Data Sets Page for the complete set of cancer studies currently stored in the portal. If you do not see your specific cancer study of interest, please contact us, and we will let you know if it's in the queue."]},{"i":"which-resources-are-integrated-for-variant-annotation","l":"Which resources are integrated for variant annotation?","p":["cBioPortal supports the annotation of variants from several different databases. These databases provide information about the recurrence of, or prior knowledge about, specific amino acid changes. For each variant, the number of occurrences of mutations at the same amino acid position present in the COSMIC database are reported. Furthermore, variants are annotated as “hotspots” if the amino acid positions were found to be recurrent linear hotspots, as defined by the Cancer Hotspots method ( cancerhotspots.org), or three-dimensional hotspots, as defined by 3D Hotspots ( 3dhotspots.org). Prior knowledge about variants, including clinical actionability information, is provided from three different sources: OncoKB ( www.oncokb.org), CIViC ( civicdb.org), as well as My Cancer Genome ( mycancergenome.org). For OncoKB, exact levels of clinical actionability are displayed in cBioPortal, as defined by the OncoKB paper."]},{"i":"what-version-of-the-human-reference-genome-is-being-used-in-cbioportal","l":"What version of the human reference genome is being used in cBioPortal?","p":["The public cBioPortal is currently using hg19/GRCh37."]},{"i":"how-does-cbioportal-handle-duplicate-samples-or-sample-ids-across-different-studies","l":"How does cBioPortal handle duplicate samples or sample IDs across different studies?","p":["The cBioPortal generally assumes that samples or patients that have the same ID are actually the same. This is important for cross-cancer queries, where each sample should only be counted once. If a sample is part of multiple cancer cohorts, its alterations are only counted once in the Mutations tab (it will be listed multiple times in the table, but is only counted once in the lollipop plot). However, other tabs (including OncoPrint and Cancer Types Summary) will count the sample twice - for this reason, we advise against querying multiple studies that contain the same samples (e.g., TCGA PanCancer Atlas and TCGA Firehose Legacy)."]},{"i":"are-there-any-normal-tissue-samples-available-through-cbioportal","l":"Are there any normal tissue samples available through cBioPortal?","p":["No, we currently do not store any normal tissue data in our system."]},{"i":"how-can-i-find-which-studies-have-mrna-expression-data-or-any-other-specific-data-type","l":"How can I find which studies have mRNA expression data (or any other specific data type)?","p":["Check out the Data Sets Page where you can view the complete set of cancer studies and sort by the number of samples with data available for any data type."]},{"i":"can-i-download-all-data-at-once","l":"Can I download all data at once?","p":["You can download all data for individual studies on the Data Sets Page or the study view page for the study of interest. You can also download all studies from our Data Hub."]},{"i":"the-data-today-is-different-than-the-last-time-i-looked-what-happened","l":"The data today is different than the last time I looked. What happened?","p":["We do occasionally update existing datasets to provide the most up-to-date, accurate and consistent data possible. The data you see today is likely an improved version of what you have seen previously. However, if you suspect that there is an error in the current version, please let us know at cbioportal@googlegroups.com.","If you need to reference an old version of a dataset, you can find previous versions in our Datahub repository."]},{"i":"how-do-i-access-data-from-aacr-project-genie","l":"How do I access data from AACR Project GENIE?","p":["Data from AACR Project GENIE are provided in a dedicated instance of cBioPortal. You can also download GENIE data from the Synapse Platform. Note that you will need to register before accessing the data. Additional information about AACR Project GENIE can be found on the AACR website."]},{"l":"TCGA"},{"i":"how-does-tcga-data-in-cbioportal-compare-to-tcga-data-in-genome-data-commons","l":"How does TCGA data in cBioPortal compare to TCGA data in Genome Data Commons?","p":["We do not currently load the mutation data from the GDC. Instead, we have the original mutation data generated by the individual TCGA sequencing centers. The source of the data is the Broad Firehose (or the publication pages for data that matches a specific manuscript). These data are usually a combination of two mutation callers, but they differ by center (typically a variant caller like MuTect plus an indel caller), and sequencing centers have modified their mutation calling pipelines over time."]},{"i":"what-happened-to-tcga-provisional-datasets","l":"What happened to TCGA Provisional datasets?","p":["We renamed TCGA Provisional datasets to TCGA Firehose Legacy to better reflect that this data comes from a legacy processing pipeline. The exact same data is now available in TCGA Firehose Legacy studies."]},{"i":"what-are-tcga-firehose-legacy-datasets-and-how-do-they-compare-to-the-publication-associated-datasets-and-the-pancancer-atlas-datasets","l":"What are TCGA Firehose Legacy datasets and how do they compare to the publication-associated datasets and the PanCancer Atlas datasets?","p":["The Firehose Legacy dataset (formerly Provisional datasets) for each TCGA cancer type contains all data available from the Broad Firehose. The publication datasets reflect the data that were used for each of the publications. The samples in a published dataset are usually a subset of the firehose legacy dataset, since manuscripts were often written before TCGA completed their goal of sequencing 500 tumors.","There can be differences between firehose legacy and published data. For example, the mutation data in the publication usually underwent more QC, and false positives might have been removed or, in rare cases, false negatives added. RNA-Seq and copy-number values may also differ slightly, as different versions of analysis pipelines could have been used. Additionally, due to additional curation during the publication process, the clinical data for the publication may be of higher quality or may contain a few more data elements, sometimes derived from the genomic data (e.g., genomic subtypes).","The TCGA PanCancer Atlas datasets derive from an effort to unify TCGA data across all tumor types. Publications resulting from this effort can be found at the TCGA PanCancer Atlas site. In the cBioPortal, data from the PanCancer Atlas is divided by tumor type, but these studies have uniform clinical elements, consistent processing and normalization of mutations, copy number, mRNA data and are ideally processed for comparative analyses."]},{"i":"where-do-the-thresholded-copy-number-call-in-tcga-firehose-legacy-data-come-from","l":"Where do the thresholded copy number call in TCGA Firehose Legacy data come from?","p":["Thresholded copy number calls in the TCGA Firehouse Legacy datasets are generated by the GISTIC 2.0 algorithm and obtained from the Broad Firehose."]},{"i":"which-studies-have-mutsig-and-gistic-results-how-do-these-results-compare-to-the-data-in-the-tcga-publications","l":"Which studies have MutSig and GISTIC results? How do these results compare to the data in the TCGA publications?","p":["MutSig and GISTIC results about the statistical significance of recurrence of mutations and copy-number alterations in specific genes are available for many TCGA studies. The MutSig and GISTIC results reported in cBioPortal are based on the same mutations and copy number data reported in each TCGA publication, or the Broad Firehose for the firehose legacy data sets. However, the publication may or may not have included the complete MutSig and GISTIC output, and therefore there may be some discrepancies between the publication and the data in cBioPortal."]},{"i":"how-can-i-download-the-pancancer-atlas-data","l":"How can I download the PanCancer Atlas data?","p":["PanCancer Atlas data can be downloaded on a study-by-study basis from cBioPortal through the Datasets page or our DataHub. To download all cancer types together, try the Genomic Data Commons PanCancer Atlas page."]},{"i":"dna-mutations-copy-number--fusions","l":"DNA (Mutations, Copy Number & Fusions)"},{"i":"does-the-cbioportal-contain-synonymous-mutation-data","l":"Does the cBioPortal contain synonymous mutation data?","p":["No, the cBioPortal does not currently support synonymous mutations. This may change in the future, but we have no plans yet to add this feature."]},{"i":"what-processing-or-filtering-is-applied-to-generate-the-mutation-data","l":"What processing or filtering is applied to generate the mutation data?","p":["Within cBioPortal, we utilize the mutation calls as provided by each publication. We do not perform any additional filtering. The only processing we do is to standardize the annotation of the mutations using Genome Nexus(which utilizes VEP with the canonical MSKCC transcript). Read more about the transcript assignments here. For specifics of which tools were used to call mutations and filters that may have been applied, refer to the publication manuscript."]},{"i":"what-transcripts-are-used-for-annotating-mutations","l":"What transcripts are used for annotating mutations?","p":["Prior to loading a study into cBioPortal, we run all mutation data through a standard pipeline (see above), which re-annotates all mutations to the canonical MSKCC transcript. Read more about the transcript assignments here."]},{"i":"how-are-protein-domains-in-the-mutational-lollipop-diagrams-specified","l":"How are protein domains in the mutational lollipop diagrams specified?","p":["Protein domain definitions come from PFAM."]},{"i":"what-is-the-difference-between-a-splice-site-mutation-and-a-splice-region-mutation","l":"What is the difference between a “splice site” mutation and a “splice region” mutation?","p":["A “splice site” mutation occurs in an intron, in a splice acceptor or donor site (2bp into an intron adjacent to the intron/exon junction), defined by Sequence Ontology. “Splice region” mutations are mutations that occur near the intron/exon junction, defined by Sequence Ontology. While synonymous mutations are generally excluded from cBioPortal, these “splice region” synonymous mutations are included due to their potential impact on splicing."]},{"i":"what-do-amplification-gain-deep-deletion-shallow-deletion-and--2--1-0-1-and-2-mean-in-the-copy-number-data","l":"What do “Amplification”, “Gain”, “Deep Deletion”, “Shallow Deletion” and \"-2\", \"-1\", \"0\", \"1\", and \"2\" mean in the copy-number data?","p":["These levels are derived from copy-number analysis algorithms like GISTIC or RAE, and indicate the copy-number level per gene:","-2 or Deep Deletion indicates a deep loss, possibly a homozygous deletion","-1 or Shallow Deletion indicates a shallow loss, possibley a heterozygous deletion","0 is diploid","1 or Gain indicates a low-level gain (a few additional copies, often broad)","2 or Amplification indicate a high-level amplification (more copies, often focal)","Note that these calls are putative. We consider the deep deletions and amplifications as biologically relevant for individual genes by default. Note that these calls are usually not manually reviewed, and due to differences in purity and ploidy between samples, there may be false positives and false negatives."]},{"i":"what-is-gistic-what-is-rae","l":"What is GISTIC? What is RAE?","p":["Copy number data sets within the portal are often generated by the GISTIC or RAE algorithms. Both algorithms attempt to identify significantly altered regions of amplification or deletion across sets of patients. Both algorithms also generate putative gene/patient copy number specific calls, which are then input into the portal.","For TCGA studies, the table in allthresholded.bygenes.txt (which is the part of the GISTIC output that is used to determine the copy-number status of each gene in each sample in cBioPortal) is obtained by applying both low- and high-level thresholds to to the gene copy levels of all the samples. The entries with value +/- 2 exceed the high-level thresholds for amplifications/deep deletions, and those with +/- 1 exceed the low-level thresholds but not the high-level thresholds. The low-level thresholds are just the 'ampthresh' and 'delthresh' noise threshold input values to GISTIC (typically 0.1 or 0.3) and are the same for every thresholds.","By contrast, the high-level thresholds are calculated on a sample-by-sample basis and are based on the maximum (or minimum) median arm-level amplification (or deletion) copy number found in the sample. The idea, for deletions anyway, is that this level is a good approximation for hemizygous losses given the purity and ploidy of the sample. The actual cutoffs used for each sample can be found in a table in the output file sample_cutoffs.txt. All GISTIC output files for TCGA are available at: gdac.broadinstitute.org."]},{"l":"RNA"},{"i":"does-the-portal-store-raw-or-probe-level-data","l":"Does the portal store raw or probe-level data?","p":["No, the portal only contains gene-level data. Data for different isoforms of a given gene are merged. Raw and probe-level data for data sets are available via NCBI GEO, dbGaP or through the GDC. See the cancer type description on the main query page or refer to the original publication for links to the raw data."]},{"i":"what-are-mrna-and-microrna-z-scores","l":"What are mRNA and microRNA Z-Scores?","p":["For mRNA and microRNA expression data, we typically compute the relative expression of an individual gene in a tumor sample to the gene's expression distribution in a reference population of samples. That reference population is all profiled samples (by default for mRNA), or normal samples (when specified), or all samples that are diploid for the gene in question (discontinued). The returned value indicates the number of standard deviations away from the mean of expression in the reference population (Z-score). The normalization method is described here. Please note that the expression results by querying a gene with the default setting (z-score threshold of 2) oftentimes are not meaningful. Since the z-scores were usually calculated compared to other tumor samples, high or low expression does not necessarily mean that the gene is expressed irregularly in tumors. The data is useful for correlation analysis, for example, pick a threshold based on overall expression (using Plots tab) and compare survival data between expression high and low groups."]},{"i":"is-there-any-normal-rna-seq-data-in-cbioportal","l":"Is there any normal RNA-seq data in cBioPortal?","p":["We have RNASeqV2 mRNA expression data for normal samples of 16 TCGA PanCan Atlas Cohorts. The data was curated from GDC, and can be downloaded from our Datahub or Data Set page. This data is not directly queriable in portal; they are only used as reference data for calculating the \"relavtive to normal expression z-score\" profile. Example: ERBB2 expression z-scores relative to normal expression."]},{"i":"how-is-tcga-rnaseqv2-processed-what-units-are-used","l":"How is TCGA RNASeqV2 processed? What units are used?","p":["RNASeqV2 from TCGA is processed and normalized using RSEM. Specifically, the RNASeq V2 data in cBioPortal corresponds to the rsem.genes.normalized_results file from TCGA. A more detailed explanation of RSEM output can be found here. cBioPortal then calculates z-scores as described above in What are mRNA and microRNA Z-Scores?"]},{"i":"is-there-microrna-data","l":"Is there microRNA data?","p":["We have microRNA data for only a few studies and they are not up to date. To download more updated miRNA data, please go to either Broad Firehose, or GDC."]},{"i":"how-can-i-query-micrornas-in-the-portal","l":"How can I query microRNAs in the portal?","p":["You can input either precursor or mature miRNA IDs. Since one precursor ID may correspond to multiple mature IDs and vise versa, the portal creates one internal ID for each pair of precursor ID and mature ID mapping. For example, an internal ID of MIR-29B-1/29B stands for precursor microRNA hsa-mir-29b-1 and mature microRNA hsa-miR-29b. After entering a precursor or mature ID, you will be asked to select one internal ID for query and that internal ID will also be displayed in the Oncoprint."]},{"l":"Protein"},{"i":"how-can-i-query-phosphoprotein-levels-in-the-portal","l":"How can I query phosphoprotein levels in the portal?","p":["You need to input special IDs for each phosphoprotein/phopshosite such as AKT1_pS473 (which means AKT1 protein phosphorylated at serine residue at position 473). You could also input aliases such as phosphoAKT1 or phosphoprotein, and the portal will ask you to select the phosphoprotein/phosphosite of your interest. Note that phosphoprotein data is only available for select studies and for a limited number of proteins / phosphorylation sites."]},{"i":"why-isnt-there-protein-data-for-my-gene-of-interest","l":"Why isn’t there protein data for my gene of interest?","p":["Most of the protein expression data in cBioPortal comes from assays like RPPA which only interrogate a subset of all proteins. TCGA ovarian, breast, and colorectal firehose legacy studies also have mass-spectrometry-based proteomics data from CPTAC which cover more genes/proteins."]},{"l":"DNA Methylation"},{"i":"which-methylation-probe-is-used-for-genes-with-multiple-probes","l":"Which methylation probe is used for genes with multiple probes?","p":["For genes with multiple probes (usually from the Infinium arrays), we only include methylation data from the probe with the strongest negative correlation between the methylation signal and the gene's expression in the study (TCGA only)."]},{"l":"Clinical Data"},{"i":"what-kind-of-clinical-data-is-stored-in-the-portal","l":"What kind of clinical data is stored in the portal?","p":["The portal currently stores de-identified clinical data, such as gender, age, tumor type, tumor grade, overall and disease-free survival data, when available. The available clinical data will differ from study to study."]},{"i":"what-is-the-meaning-of-os_status--os_months-and-pfs_status--pfs_months","l":"What is the meaning of OS_STATUS / OS_MONTHS, and PFS_STATUS / PFS_MONTHS?","p":["OS_STATUS means overall survival status (\"0\" -> \"living\" or \"1\" -> \"deceased\") and OS_MONTHS indicates the number of months from time of diagnosis to time of death or last follow up. PFS refers to “progression free survival”, indicating whether patient’s disease has recurred/progressed (PFS_STATUS), and at what time the disease recurred or the patient was last seen (PFS_MONTHS)."]},{"l":"Analysis Questions"},{"i":"how-can-i-queryexplore-a-select-subset-of-samples","l":"How can I query/explore a select subset of samples?","p":["cBioPortal allows you to run a query or explore study view using a user-specified list of samples/patients.","The first step is to define your sample set. There are two slightly different approaches you can take to defining your sample set, depending on whether you are selecting based on a positive criteria (samples with TP53 mutations) or a negative criteria (samples without a KRAS mutation).","Let’s take the positive criteria example first. Run a query for TP53 mutations using OQL (TP53: MUT) in your study of interest. Click over to the “Download” tab. In the table at the top, find the row that starts with “Samples affected”, and either Copy or Download that list. This is your list of samples that have a TP53 mutation.","Now for the negative criteria example. This also begins by using OQL to run a query for KRAS mutations (KRAS: MUT) in your study of interest. Click over to the “Download” tab. Look at the table at the top again, but this time find the row that starts with “Sample matrix”. Copy or download this data and open it in Excel. You will see a two column table that indicates whether a given sample is altered or not, indicated by 0 or 1. Sort by the second column and then copy all the sample IDs from the first column that have a 0 in the second column. This is your list of samples that do not have a KRAS mutation.","With a sample list in hand, you can now either run a query in just the selected samples (select “User-defined Case List” in the “Select Patient/Case Set:” dropdown) or explore this set of patients in study view (click “Select cases by IDs” and then create a Virtual Study restricted to just those samples).","For more information about OQL, see the specification page or view the tutorial slides. For more information about virtual studies, read this FAQ or view the tutorial slides."]},{"i":"how-can-i-compare-two-or-more-subsets-of-samples","l":"How can I compare two or more subsets of samples?","p":["cBioPortal has a suite of analysis tools to enable comparisons between user-defined groups of samples/patients. For an overview of this functionality, see our tutorial on group comparison."]},{"i":"is-it-possible-to-determine-if-a-particular-mutation-is-heterozygous-or-homozygous-in-a-sample-when-a-sample-has-2-mutations-in-one-gene-is-it-possible-to-determine-whether-the-mutations-are-in-cis-or-in-trans-with-each-other","l":"Is it possible to determine if a particular mutation is heterozygous or homozygous in a sample? When a sample has 2 mutations in one gene, is it possible to determine whether the mutations are in cis or in trans with each other?","p":["There is currently no way to definitively determine whether a mutation is heterozygous/homozygous or in cis/trans with another mutation. However, you can try to infer the status of mutations by noting the copy number status of the gene and the variant allele frequency of the mutation(s) of interest relative to other mutations in the same sample. The cBioPortal patient/sample view can help you accomplish this.","Specifically in the case of TCGA samples with two mutations in the same gene, you can also obtain access to the aligned sequencing reads from the GDC and check if the mutations are in cis or in trans (if the mutations are close enough to each other)."]},{"i":"how-can-i-query-overunder-expression-of-a-gene","l":"How can I query over/under expression of a gene?","p":["cBioPortal supports Onco Query Language (OQL) which can be used to query over/under expression of a gene. When writing a query, select an mRNA expression profile. By default, samples with expression z-scores >2 or <-2 in any queried genes are considered altered. Alternate cut-offs can be defined using OQL, for example: \"EGFR: EXP>2\" will query for samples with an EGFR expression z-score >2. Review for the OQL specification page or tutorial slides for more specifics and examples."]},{"i":"how-can-i-compare-outcomes-in-patients-with-high-vs-low-expression-of-a-gene","l":"How can I compare outcomes in patients with high vs low expression of a gene?","p":["To compare outcomes in patients with high vs low expression of a gene (excluding those patients with intermediate levels of expression), we will follow a 2 step process that builds on the approach described above in How can I query/explore a select subset of samples?, utilizing OQL to first identify and then stratify that cases of interest.","First, identify the sample set using OQL. For example, to stratify patients based on expression of EGFR, add an mRNA profile to the query, and write \"EGFR: EXP>2 EXP<-2\" in the gene set box. After running the query, go to the Download tab and copy/download the “Samples affected” list.","Second, return to the homepage and paste the list of sample IDs from the previous step into the “User-defined Case List” in the “Select Patient/Case Set:” dropdown. This query will now only look at samples with high or low expression. To now stratify into high vs low for survival analysis, enter \"EGFR: EXP>2\" in the gene set box (don’t forget to select the same mRNA profile). Run the query and click over to the Survival tab. The “cases with alteration” are patients with high expression of EGFR and the cases without alteration are those with low expression of EGFR.","We use 2 and -2 as example thresholds above, but it is also a good idea to look at the distribution of expression data and select a threshold based on that. Plots tab can be useful for analyzing the expression distribution."]},{"l":"Results View"},{"l":"OncoPrint"},{"i":"what-are-oncoprints","l":"What are OncoPrints?","p":["OncoPrints are compact means of visualizing distinct genomic alterations, including somatic mutations, copy number alterations, and mRNA expression changes across a set of cases. They are extremely useful for visualizing gene set and pathway alterations across a set of cases, and for visually identifying trends, such as trends in mutual exclusivity or co-occurrence between gene pairs within a gene set. Individual genes are represented as rows, and individual cases or patients are represented as columns.","image"]},{"i":"can-i-change-the-order-of-genes-in-the-oncoprint","l":"Can I change the order of genes in the OncoPrint?","p":["By default, the order of genes in the OncoPrint will be the same as in your query. You can change the order by (a) clicking on the gene name and dragging it up/down or (b) clicking on the three vertical dots next to the gene name to move the gene up/down."]},{"i":"can-i-visualize-my-own-data-within-an-oncoprint","l":"Can I visualize my own data within an OncoPrint?","p":["Yes, check out the OncoPrinter tool on our Visualize Your Data page."]},{"i":"why-are-some-samples-not-profiled-for-certain-genes","l":"Why are some samples “Not profiled” for certain genes?","p":["Some studies include data from one or more targeted sequencing platforms which do not include all genes. For samples sequenced on these smaller panels, cBioPortal will indicate that a particular gene was not included on the sequencing panel used for that sample. Alteration frequency calculations for each gene also take this information into account. Hover over a sample in OncoPrint to see the gene panel name, and click on that gene panel name to view a list of the genes included on that panel."]},{"l":"Other pages"},{"i":"does-the-mutual-exclusivity-tab-calculate-its-statistics-using-all-samplesalterations-or-only-a-specific-subset","l":"Does the Mutual Exclusivity tab calculate its statistics using all samples/alterations or only a specific subset?","p":["The calculations on the Mutual Exclusivity tab are performed using all samples included in the query. A sample is defined as altered or unaltered for each gene based on the OQL utilized in the query - by default, this will be non-synonymous mutations, fusions, amplifications and deep deletions."]},{"i":"what-are-the-values-of-the-box-and-whiskers-in-a-boxplot","l":"What are the values of the box and whiskers in a boxplot?","p":["In boxplots on cBioPortal, the box is drawn from the 25th percentile (Q1) to the 75th percentile (Q3), with the horizontal line in between representing the median. Whiskers are drawn independently above and below the box, and will extend to the maximum or minimum data values, unless there are outlier values, in which case the whisker will extend to 1.5 * IQR (interquartile range = Q3-Q1). Outliers are defined as values that extend beyond 1.5 * IQR."]},{"l":"Study View"},{"i":"how-to-use-filter-in-the-url-of-study-view-page","l":"How to use filter in the URL of Study View page?","p":["You can filter the study based on values of one attribute in the URL. For example, https://www.cbioportal.org/study/summary?id=msk_impact_2017#filterJson={clinicalDataFilters:[{attributeId:CANCER_TYPE,values:[{value:Melanoma}]}]}","filterJson is set in the url hash string. Here are the allowed parameters and format for it in filterJson:"]},{"i":"what-if-i-have-other-questions-or-comments","l":"What if I have other questions or comments?","p":["Please contact us at cbioportal@googlegroups.com. Previous discussions about cBioPortal are available on the user discussion mailing list."]}],[{"l":"Overview"},{"l":"Overview of Resources"},{"l":"Tutorial Slides","p":["These tutorial slides contain annoted screenshots to walk you through using the cBioPortal site.","Single Study Exploration Google slides| PDF","Single Study Query Google slides| PDF","Patient View Google slides| PDF","Virtual Studies Google slides| PDF","Onco Query Language (OQL) Google slides| PDF","Group Comparison Google slides| PDF","Pathways Google slides| PDF"]},{"l":"Webinar Recordings","p":["Recordings of live webinars from April & May 2020","Introduction to cBioPortal youtube.com| bilibili.com | Download PDF | View slides","Mutation Details & Patient View youtube.com| bilibili.com | Download PDF | View slides","Expression Data Analysis youtube.com| bilibili.com | Download PDF | View slides","Group Comparison youtube.com| bilibili.com | Download PDF | View slides","API & R Client youtube.com| bilibili.com | Download PDF | View slides | Workshop code"]},{"l":"How-To Videos","p":["Short videos that show how to perform specific analyses or how to use specific pages.","Comparing samples based on expression level of a gene youtube.com","Proteomic profiles in cBioPortal - An example based on cancer cell lines from the Cancer Cell Line Encyclopedia (CCLE) youtube.com","Filtering and adding clinical data to Mutations tab youtube.com","Exploring the longitudinal evolution of individual patients youtube.com","Using Onco Query Language (OQL) to query based on the expression level of genes youtube.com","How to explore the data in a study youtube.com","How to run a query for genes of interest youtube.com","How to download data youtube.com","Navigating AACR GENIE - Biopharma Collaborative (BPC) dataset youtube.com"]},{"l":"Documentation","p":["Frequently Asked Questions FAQ","Onco Query Language OQL"]},{"l":"Publications","p":["Cerami et al. Cancer Discovery 2012 PubMed","Gao et al. Science Signaling 2013 PubMed"]},{"l":"Tutorials by others","p":["cBioPortal Tutorial Series by Jackson Laboratory youtube.com","Using the Cancer Digital Slide Archive in cBioPortal by Nicole M. Rivera Acevedo youtube.com (English)| youtube.com (Spanish)","Visualizing and Downloading RNASeq data from cBioPortal by Farhan Haq youtube.com"]}],[{"l":"By page"},{"l":"Resources by Page"},{"l":"Study View","p":["Tutorial Slides: Single Study Exploration Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com","How-To Video: Comparing samples based on expression level of a gene youtube.com","Tutorial Slides: Virtual Studies Google slides| PDF"]},{"l":"Group Comparison","p":["Tutorial Slides: Group Comparison Google slides| PDF","Webinar: Group Comparison youtube.com| bilibili.com","How-To Video: Comparing samples based on expression level of a gene youtube.com"]},{"i":"running-a-query--results-view","l":"Running a Query / Results View"},{"l":"General","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"OQL","p":["Documentation OQL","Tutorial Slides: Onco Query Language (OQL) Google slides| PDF","Webinar: Expression Data Analysis youtube.com| bilibili.com","How-To Video: Using Onco Query Language (OQL) to query based on the expression level of genes youtube.com"]},{"l":"OncoPrint","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Cancer Types Summary","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Mutual Exclusivity","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Plots","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Mutations","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com","Webinar: Mutation Details & Patient View youtube.com| bilibili.com","How-To Video: Filtering and adding clinical data to Mutations tab youtube.com"]},{"l":"Co-expression","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"i":"comparisonsurvival","l":"Comparison/Survival","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com","Tutorial Slides: Group Comparison Google slides| PDF","Webinar: Group Comparison youtube.com| bilibili.com"]},{"l":"CN Segments","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Pathways","p":["Tutorial Slides: Pathways Google slides| PDF","Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Downloads","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Patient View","p":["Tutorial Slides: Patient View Google slides| PDF","Tutorial Slides: Pathways Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com","Webinar: Mutation Details & Patient View youtube.com| bilibili.com","How-To Video: Exploring the longitudinal evolution of individual patients youtube.com"]}],[{"i":"onco-query-language-oql","l":"Onco Query Language (OQL)"},{"l":"Introduction to OQL","p":["The Onco Query Language (OQL) is used to define which specific types of alterations are included in a query on the cBioPortal for Cancer Genomics. By default, querying for a gene includes mutations, fusions, amplifications and deep deletions. OQL can be used to specify specific mutations (e.g. BRAF V600E) or types of mutations (e.g. BRCA1 truncating mutations), lower level copy number alterations (e.g. CDKN2A shallow deletions), changes in mRNA or protein expression, and more.","OQL-specified alterations will be reflected on most tabs, including OncoPrint, but are not currently reflected on the Plots, Co-Expression or Expression tabs.","Note that OQL assumes any word that it doesn't recognize is a mutation code.","Additional explanation and examples using OQL are available in the User Guide."]},{"l":"OQL Keywords","p":["* These are the default OQL keywords used for each data type when a gene is queried without any explicit OQL.","AMP Amplifications HOMDEL Deep Deletions GAIN Gains HETLOSS Shallow Deletions Comparison operators can also be used with CNA(e.g. CNA = GAIN is the same as AMP GAIN)","AMP HOMDEL","Copy Number Alterations","Data Type","Default*","EXP -x mRNA expression is less than x standard deviations (SD) below the mean EXP x mRNA expression is greater than x SD above the mean The comparison operators = and = also work","EXP = 2 EXP = -2","FUSION","FUSION All fusions (note that many studies lack fusion data)","Fusions","Keywords and Syntax","mRNA Expression","MUT","MUT All non-synonymous mutations MUT = protein change Specific amino acid changes (e.g. V600E or V600) MUT = mutation type Acceptable values are: MISSENSE, NONSENSE, NONSTART, NONSTOP, FRAMESHIFT, INFRAME, SPLICE, TRUNC","Mutations","PROT -x Protein expression is less than x standard deviations (SD) below the mean PROT x Protein expression is greater than x SD above the mean The comparison operators = and = also work","PROT = 2 PROT = -2","Protein/phosphoprotein level","Users can define specific subsets of genetic alterations for five data types:"]},{"l":"OQL modifiers","p":["Mutations and copy number alterations can be further refined using modifiers:","Keyword","Applicable Data Type","Explanation","DRIVER","Mutations Fusions Copy Number Alterations","Include only mutations, fusions and copy number alterations which are driver events, as defined in OncoPrint (default: OncoKB and CancerHotspots).","GERMLINE","Mutations","Include only mutations that are defined as germline events by the study.","SOMATIC","Include all mutations that are not defined as germline.","(a-b)(protein position range)","Include all mutations that overlap with the protein position range a-b, where a and b are integers. If you add a *(i.e. (a-b*)) then it will only include those mutations that are fully contained inside a-b. The open-ended ranges (a-) and (-b) are also allowed."]},{"l":"Basic Usage","p":["When querying a gene without providing any OQL specifications, cBioPortal will default to these OQL terms for a query with Mutation and Copy Number selected in the Genomic Profiles section: MUT FUSION AMP HOMDEL","image of basic query","You can see the OQL terms applied by hovering over the gene name in OncoPrint:","image of basic query oncoprint","If you select RNA and/or Protein in the \"Genomic Profiles\" section of the query, the default settings are:","RNA: EXP = 2 EXP = -2","Protein: PROT = 2 PROT = -2","image of exp prot query oncoprint","You must select the relevant Genomic Profile in order for OQL to query that data type. For example, you can't add EXP 2 to the query without also selecting an RNA profile.","Proper formatting for OQL is straightforward: gene name, followed by a colon, followed by any OQL keywords and ending in a semicolon, an end-of-line, or both.","In general, any combination of OQL keywords and/or expressions can annotate any gene, and the order of the keywords is immaterial.","Below we will go into greater detail about each data type."]},{"l":"Mutations","p":["For example, to view TP53 truncating mutations and in-frame insertions/deletions:","FRAMESHIFT","INFRAME","MISSENSE","mutation type can be one or more of:","NONSENSE","NONSTART","NONSTOP","Note that this will only work to exclude a single event. Because OQL uses 'OR' logic, excluding multiple mutations or excluding a mutation while including another mutation (e.g. BRAF: MUT=V600 MUT!=V600E) will result in querying all mutations.","OQL can also be used to exclude a specific protein change, position or type of mutation. For example, below are examples to query all EGFR mutations except T790M, all BRAF mutations except those at V600 and all TP53 mutations except missense:","OQL for mutations can also be written without MUT =. The following examples are identical:","Or all mutations of a specific type:","SPLICE","To view cases with specific mutations, provide the specific amino acid change of interest:","TRUNC","You can also view all mutations at a particular position:"]},{"l":"Copy Number Alterations","p":["To view cases with specific copy number alterations, provide the appropriate keywords for the copy number alterations of interest. For example, to see amplifications:","Or amplified and gained cases:","Which can also be written as:"]},{"l":"Expression","p":["High or low mRNA expression of a gene is determined by the number of standard deviations (SD) from the mean. For example, to see cases where mRNA for CCNE1 is greater than 3 SD above the mean:"]},{"l":"Protein","p":["High or low protein expression is similarly determined by the number of SD from the mean. For example, to see cases where protein expression is 2 SD above the mean:","Protein expression can also be queried at the phospho-protein level:"]},{"l":"Modifiers","p":["Modifiers can be used on their own or in combination with other OQL terms for mutations, fusions and copy number alterations to further refine the query. Modifiers can be combined with other OQL terms using an underscore. The order in which terms are combined is immaterial."]},{"l":"Driver","p":["The DRIVER modifier applies to mutations, fusions and copy number alterations. The definition of what qualifies as a driver alteration comes from the \"Mutation Color\" menu in OncoPrint. By default, drivers are defined as mutations, fusions and copy number alterations in OncoKB or CancerHotspots.","On its own, the DRIVER modifier includes driver mutations, fusions and copy number alterations:","Or it can be used in combination with another OQL term. For example, to see only driver fusion events:","Or driver missense mutations:","When combining DRIVER with another OQL term, the order doesn't matter: MUT_DRIVER and DRIVER_MUT are equivalent. DRIVER can be combined with:","MUT","MUT = mutation type or MUT = protein change","FUSION","CNA","AMP or GAIN or HETLOSS or HOMDEL","GERMLINE or SOMATIC(see below)"]},{"i":"germlinesomatic","l":"Germline/Somatic","p":["The GERMLINE and SOMATIC modifiers only apply to mutations. A mutation can be explicitly defined as germline during the data curation process. Note that very few studies on the public cBioPortal contain germline data.","GERMLINE or SOMATIC can be combined with:","MUT","MUT = mutation type or MUT = protein change","DRIVER","To see all germline BRCA1 mutations:","Or to see specifically truncating germline mutations:","The order is immaterial; both options produce identical results.","Or to see somatic missense mutations:","GERMLINE or SOMATIC can also be combined with DRIVER and, optionally, a more specific mutation term (e.g. NONSENSE):"]},{"l":"The DATATYPES Command","p":["To save copying and pasting, the DATATYPES command sets the genetic annotation for all subsequent genes. Thus,","is equivalent to:"]},{"l":"Merged Gene Tracks","p":["OQL can be used to create a merged gene track in OncoPrint, in which alterations in multiple genes appear as a single track. This is done by enclosing a list of genes in square brackets. By default, the track will be labeled by the gene names, separated by '/'. To instead specify a label, type the desired label within double quotes at the beginning of the square brackets. For example:","The resulting merged gene track will be visible in OncoPrint and can be expanded to view the individual gene tracks. For example:","Image of merged genes in OncoPrint","https://www.cbioportal.org/results/oncoprint?session_id=5c1966e2e4b05228701f958e","It is possible to include OQL for specific alterations in merged gene tracks, as well as querying a combination of single and merged gene tracks.","Note that merged gene tracks only appear in OncoPrint. All other pages show the individual genes."]},{"i":"example-rb-pathway-alterations","l":"Example: RB Pathway Alterations","p":["Provided below is one example of the power of using OQL. Additional examples are available in the User Guide."]},{"l":"Using the Defaults","p":["Select Ovarian Serous Cystadenocarcinoma (TCGA, Nature 2011) with the following data types:","Mutations","Putative copy-number alterations (GISTIC)","mRNA expression (mRNA expression Z-scores (all genes))","Input the following three genes in the RB pathway:","CCNE1","RB1","CDKN2A","image of rb query","Submit this query and note how many samples have alterations in multiple of these genes:","image of rb oncoprint","https://www.cbioportal.org/results/oncoprint?session_id=5c1966cee4b05228701f958d"]},{"l":"Greater Insight with OQL","p":["Given what is known about the RB pathway, the events that are most likely selected for in the tumors are CCNE1 amplification, RB1 deletions or mutations, and loss of expression of CDKN2A. To investigate this hypothesis, we can use OQL to display only these events. Modify the query to reflect this:","Examine the updated OncoPrint:","image of modified rb oncoprint","https://www.cbioportal.org/results/oncoprint?session_id=5c1966aee4b05228701f958c","This shows that alterations in these genes are almost entirely mutually-exclusive -- no cases are altered in all three genes and only six are altered in two genes. This supports the theory that the tumor has selected for these events."]},{"i":"questions-feedback","l":"Questions? Feedback?","p":["Please share any questions or feedback on OQL with us: https://groups.google.com/group/cbioportal","Also note that additional explanation and examples using OQL are available in the User Guide."]}],[{"l":"News"},{"i":"aug-21-2023","l":"Aug 21, 2023","p":["Added data consisting of 4,488 samples from 7 studies:","Lung Adenocarcinoma Met Organotropism (MSK, Cancer Cell 2023) 2653 samples","Acute Myeloid Leukemia (OHSU, Cancer Cell 2022) 942 samples","Colon Cancer (Sidra-LUMC AC-ICAM, Nat Med 2023) 348 samples","Pediatric Neuroblastoma (MSK, Nat Genet 2023) 223 samples","Colorectal Adenocarcinoma (MSK, Nat Commun 2022) 180 samples","Bladder Cancer (Columbia University/MSK, Cell 2018) 130 samples","Myoepithelial Carcinomas of Soft Tissue (WCM, CSH Molecular Case Studies 2022) 12 samples","Gene Tables Update Updated tables of genes (main and alias), based on Apr 1, 2023 HGNC release. See seedDB release note here for details."]},{"i":"aug-1-2023","l":"Aug 1, 2023","p":["Enhancement: One-sided Fisher's exact tests were changed to be two-sided. The affected pages are:","Results View Page - Mutual Exclusivity Tab","Results View Page - Comparison Tab - Genomic Alterations Tab","Comparison Page - Genomic Alterations Tab","Comparison Page - Mutations Tab","Please note that the Mutations tab on the Comparison page is a recent feature and was introduced with the two-sided Fisher's exact test already implemented.","Several users pointed out that using a one-sided test was incorrect for these comparisons. Please see discussions here for more information."]},{"i":"may-2-2023","l":"May 2, 2023","p":["New Feature: The mutations tab now shows variant annotations from the repository of Variant with Unexpected Effects (reVUE)."]},{"i":"apr-11-2023","l":"Apr 11, 2023","p":["New Feature: Disable autocommit and manually commit filters in study view. Manually commit filters can improve cBioPortal performance when query large dataset."]},{"i":"apr-5-2023","l":"Apr 5, 2023","p":["Added data consisting of 2,472 samples from 5 studies:","Bladder Cancer (MSK, Cell Reports 2022) 1659 samples","Gastrointestinal Stromal Tumor (MSK, NPJ Precis Oncol 2023) 499 samples","Appendiceal Cancer (MSK, J Clin Oncol 2022) 273 samples","Colorectal Cancer (MSK, Cancer Discovery 2022) 22 samples","Nerve Sheath Tumors (Johns Hopkins, Sci Data 2020) 19 samples[First GRCh38 Study]","Data Improvement","Added TERT promoter mutation status to Melanomas (TCGA, Cell 2015), Papillary Thyroid Carcinoma (TCGA, Cell 2014) TCGA studies."]},{"i":"apr-4-2023","l":"Apr 4, 2023","p":["New Feature: Allow numeric data type for custom data charts.","This also allows to have numerical custom data after we query based on genes (custom data 2 in the image):"]},{"i":"jan-10-2023","l":"Jan 10, 2023","p":["New Feature: New Pathways tab on the Group Comparison view. Example: Primary vs Metastasis samples in MSK-IMPACT Clinical Sequencing Cohort"]},{"i":"dec-13-2022","l":"Dec 13, 2022","p":["New Feature: New Mutations tab on the Group Comparison view. Example: Primary vs Metastasis samples in MSK-IMPACT Clinical Sequencing Cohort"]},{"i":"oct-12-2022","l":"Oct 12, 2022","p":["Added data consisting of 1,459 samples from 10 studies:","Hepatocellular Carcinoma (MERiC/Basel, Nat Commun. 2022) 122 samples","Prostate Cancer Brain Metastases (Bern, Nat Commun. 2022) 168 samples","Pan-Cancer MSK-IMPACT MET Validation Cohort (MSK 2022) 69 samples","Endometrial Carcinoma cfDNA (MSK, Clin Cancer Res 2022) 44 samples","Endometrial Carcinoma MSI (MSK, Clin Cancer Res 2022) 181 samples","Gallbladder Cancer (MSK, Clin Cancer Res, 2022) 244 samples","Meningioma (University of Toronto, Nature 2021) 121 samples","Mixed Tumors: Selpercatinib RET Trial (MSK, Nat Commun. 2022) 188 samples","Low-Grade Serous Ovarian Cancer (MSK, Clin Cancer Res 2022) 119 samples","Urothelial Carcinoma (BCAN/HCRN 2022) 203 samples"]},{"i":"sep-6-2022","l":"Sep 6, 2022","p":["Enhancement: Oncoprint can now save clinical tracks after login"]},{"i":"aug-11-2022","l":"Aug 11, 2022","p":["New Major Release: v5.0.0 release drops support for fusions in the mutation data format. Going forward fusions can only be imported in the Structural Variant (SV) format. This is mainly a refactoring effort to simplify the codebase and pave the way for the development of novel structural variant visualizations in the future. For cBioPortal instance maintainer, please reference our Migration Guide for instruction."]},{"i":"jul-26-2022","l":"Jul 26, 2022","p":["Added data consisting of 6,631 samples from 7 studies:","Metastatic Biliary Tract Cancers (SUMMIT - Neratinib Basket Trial, 2022) 36 samples","Rectal Cancer (MSK, Nature Medicine 2022) 801 samples","Lung Adenocarcinoma (MSK Mind,Nature Cancer 2022) 247 samples","Myelodysplastic Syndromes (MDS IWG, IPSSM, NEJM Evidence 2022) 3,323 samples","Esophagogastric Cancer (MSK, Clin Cancer Res 2022) 237 samples","Pan-cancer Analysis of Advanced and Metastatic Tumors (BCGSC, Nature Cancer 2020) 570 samples","Prostate Adenocarcinoma (MSK, Clin Cancer Res. 2022) 1,417 samples"]},{"i":"may-31-2022","l":"May 31, 2022","p":["New Feature: Added Quartiles, Median split and Generate bins options for bar charts on the study view page, where Generate bins allows user to define bin size and min value"]},{"i":"may-12-2022","l":"May 12, 2022","p":["New Feature: Show cohort alteration frequencies in pathways from NDEx on the Results View. Example: Glioblastoma signaling pathways in MSK-IMPACT (2017) cohort"]},{"i":"may-5-2022","l":"May 5, 2022","p":["New Feature: View mutations and copy number changes in the Integrative Genomics Viewer (IGV) on the Patient View. Example: Endometrial cancer patient in TCGA","New Feature: Add charts that plot categorical vs continuous data on the Study View. Example: MSK-IMPACT (2017) cohort","New Feature: Several single cell data integrations are now available for the CPTAC glioblastoma study, allowing one to:","Compare genomic alterations and cell type fractions in oncoprints on the Results View( Example)","Explore the single cell data further in Vitessce on the Patient View( Example)","Create cohorts and groups based on cell type fractions on the Study View( Example)","Compare differences in cell type fractions between groups on the Comparison Page( Example)"]},{"i":"apr-20-2022","l":"Apr 20, 2022","p":["Added data consisting of 2,557 samples from 5 studies:","Breast Cancer (HTAN, 2022) 5 samples","Colorectal Cancer (MSK, 2022) 47 samples","Pediatric Pancan Tumors (MSK, 2022) 135 samples","Sarcoma (MSK, 2022) 2,138 samples","Lung Cancer in Never Smokers (NCI, Nature Genetics 2021) 232 samples","Gene Tables Update Updated tables of genes (main and alias), based on Jan 1, 2022 HGNC release. See seedDB release note here for details.","Data Improvement","Pan-can studies timeline addition: TREATMENT, OTHER MALIGNANCY FORM, SAMPLE ACQUISITION, STATUS are added to all 32 TCGA Pan-Can studies. Details for data source and transformation process can be found here or in the README.md files included in each study folder on datahub. Example: patient view of TCGA-A2-A04P in Breast Invasive Carcinoma Tumor Type","Pan-can studies methylation addition: methylation profile (27k and 450k merged) are added to all 32 TCGA Pan-Can studies, in generic assay format. Data source: GDC. Example: search by gene or probe from dropdown, to add a chart in study view, a track in Oncoprint (single study query only), or plots in plots tab.","Single cell (type fraction and phases) data (in generic assay format) is added to Glioblastoma (CPTAC, Cell 2021)"]},{"i":"jan-4-2022","l":"Jan 4, 2022","p":["Added data consisting of 27,447 samples from 10 studies:","Endometrial Carcinoma (CPTAC, Cell 2020) 95 samples","Pancreatic Ductal Adenocarcinoma (CPTAC, Cell 2021) 140 samples","Lung Squamous Cell Carcinoma (CPTAC, Cell 2021) 108 samples","Lung Adenocarcinoma (CPTAC, Cell 2020) 110 samples","Glioblastoma (CPTAC, Cell 2021) 99 samples","Breast Cancer (CPTAC, Cell 2020) 122 samples","Pediatric Brain Cancer (CPTAC/CHOP, Cell 2020) 218 samples","Metastatic Prostate Cancer (Provisional, June 2021) 123 samples","MSK MetTropism (MSK, Cell 2021) 25,775 samples","Cancer Therapy and Clonal Hematopoiesis (MSK, 2021) 657 samples","Added TMB (nonsynonymous) scores for all studies. Example: new TMB field for study gbm_cptac_2021(Details for the calculation can be found HERE)"]},{"i":"nov-12-2021","l":"Nov 12, 2021","p":["Added data consisting of 3,680 samples from 6 studies:","Breast Cancer MAPK (MSKCC, Nat Commun 2021) 145 samples","Colorectal Cancer (MSK, 2020) 64 samples","Breast Cancer (MSK, Clinical Cancer Res 2020) 60 samples","High-Grade Serous Ovarian Cancer (MSK, 2021) 45 samples","Diffuse Glioma (GLASS Consortium, Nature 2019) 444 samples","Pan-cancer analysis of whole genomes (ICGC/TCGA, Nature 2020) 2,922 samples"]},{"i":"nov-32021","l":"Nov 3,2021","p":["New Feature: Add Uniprot topology as a new annotation track on the Mutations Tab of the Results View. Example: EGFR in MSK-IMPACT (2017) cohort"]},{"i":"oct-1-2021","l":"Oct 1, 2021","p":["New Feature: Arm level Copy Number events are now loaded into cBioPortal using the Categorial Generic Assay Data Type. They can be found in a tab under the Add Charts Button of the Study View Example: Arm Level Data in TCGA PanCancer Atlas"]},{"i":"sep-22-2021","l":"Sep 22, 2021","p":["Added data consisting of 14,844 samples from 7 studies:","Colorectal Cancer (MSK, Gastroenterology 2020) 471 samples","Metastatic Breast Cancer (MSK, Cancer Discovery 2021) 1,365 samples","Lung Adenocarcinoma (MSKCC, 2021) 186 samples","Race Differences in Prostate Cancer (MSK, 2021) 2,069 samples","Medulloblastoma (DKFZ, Nature 2017) 491 samples","Thoracic Cancer (MSK, 2021) 68 samples","China Pan-cancer (OrigiMed, 2020) 10,194 samples"]},{"i":"sep-21-2021","l":"Sep 21, 2021","p":["Enhancement: Dowloading the Lollipop plot on the Mutations Tab of the Results View will now also include the annotation tracks:"]},{"i":"aug-17-2021","l":"Aug 17, 2021","p":["New Feature: The Mutations Tab of the Results View can now show exon numbers as an annotation track Example: MET Exon 14 Mutations in MSK-IMPACT (2017) cohort"]},{"i":"aug-10-2021","l":"Aug 10, 2021","p":["New Feature: Use the filtering capabilities in the Mutations Tab of the Results View to create a custom cohort that one can open directly in the Study View Example: CTNNB1 in MSK-IMPACT (2017) cohort"]},{"i":"jul-27-2021","l":"Jul 27, 2021","p":["New Feature: Add a custom filter to any column of the Mutations Tab in the Results View Example: CTNNB1 in MSK-IMPACT (2017) cohort","New Feature: Show detailed descriptions for each annotation source in the header of the the Mutations Table in both the Results View and the Patient View Example link"]},{"i":"jul-6-2021","l":"Jul 6, 2021","p":["New Feature: Add any clinical data as a column on the Mutations Tab in the Results View Example: EGFR in MSK-IMPACT (2017) cohort"]},{"i":"june-23-2021","l":"June 23, 2021","p":["Added data consisting of 1,084 samples from 5 studies:","Intrahepatic Cholangiocarcinoma (MSK, Hepatology 2021) 412 samples","Intrahepatic Cholangiocarcinoma (Mount Sinai 2015) 8 samples","RAD51B Associated Mixed Cancers (Mandelker 2021 19 samples","Intrahepatic Cholangiocarcinoma (MSK, 2020) 219 samples","Lung Adenocarcinoma (NPJ Precision Oncology, MSK 2021) 426 samples","Added mass-spec proteome data from CPTAC to Breast Invasive Carcinoma (TCGA, PanCancer Atlas), Ovarian Serous Cystadenocarcinoma (TCGA, PanCancer Atlas) and Colorectal Adenocarcinoma (TCGA, PanCancer Atlas).","Added mass-spec phosphoproteome site level expression from CPTAC to Breast Invasive Carcinoma (TCGA, PanCancer Atlas) and Ovarian Serous Cystadenocarcinoma (TCGA, PanCancer Atlas).","Updated gene tables Updated tables of genes (main and alias), based on HGNC. See details HERE in section Contents of seed database. Sripts/resources/process used to construct new tables are described HERE."]},{"i":"june-1-2021","l":"June 1, 2021","p":["New Feature: In certain studies where we have the data we show read counts for uncalled mutations on the Patient View Example: A patient in the Glioma (MSK, 2019) cohort"]},{"i":"may-10-2021","l":"May 10, 2021","p":["New Feature: Pick color for User Defined Groups Example: Color Bladder Cancer Group in MSK-IMPACT (2017) cohort, implemented by The Hyve."]},{"i":"may-4-2021","l":"May 4, 2021","p":["New Feature: Add more categories of mutations to the Mutations Tab on the Results View, including Driver/VUS, Splice and Structural Variants Example: TP53 alterations in the MSK-IMPACT (2017) cohort"]},{"i":"april-21-2021","l":"April 21, 2021","p":["Added data consisting of 4074 samples from 9 studies:","Metaplastic Breast Cancer (MSK, 2021) 19 samples","Lung Adenocarcinoma (MSKCC, 2020) 604 samples","Cutaneous Squamous Cell Carcinoma (UCSF, 2021) 105 samples","MSK-IMPACT and MSK-ACCESS Mixed Cohort (MSK, 2021) 1446 samples","Melanoma (MSKCC, 2018) 720 samples","Cholangiocarcinoma (ICGC, Cancer Discov 2017) 489 samples","Esophageal/Stomach Cancer (MSK, 2020) 487 samples","Retinoblastoma (MSK, Cancers 2021) 83 samples","Combined Hepatocellular and Intrahepatic Cholangiocarcinoma (Peking University, Cancer Cell 2019) 121 samples"]},{"i":"april-20-2021","l":"April 20, 2021","p":["New Feature: Add driver annotations to download tab on Results View Example: RAS/RAF alterations in colorectal cancer"]},{"i":"march-30-2021","l":"March 30, 2021","p":["Enhancement: Add 95% Confidence Interval for Survival Plots Example: Altered vs Unaltered EGFR in Lung Cancer"]},{"i":"march-11-2021","l":"March 11, 2021","p":["New Feature: Combine different types of alterations in Comparison View Example: Deletions and Truncating events in primary vs metastases or read more on The Hyve's blog","Enhancement: Improve UI for OncoPrint, aggregating various data modalities in a single add track dropdown button Example: Add clinical, heatmap and treatment response data into the OncoPrint"]},{"i":"february-16-2021","l":"February 16, 2021","p":["Enhancement: Show only TCGA PanCancer Atlas Pathways in Results and Patient View to avoid showing many similar pathways Example: Clinvar APC and CTNNB1 alterations in WNT pathway"]},{"i":"january-28-2021","l":"January 28, 2021","p":["New Feature: Show ClinVar Interpretation in Mutation tables Example: Clinvar Interpretations in BRCA2"]},{"i":"january-12-2021","l":"January 12, 2021","p":["New Feature: Add your own custom data for a sample or patient to use on the study or comparison view Example: Add custom data to three samples and do a comparison","New Feature: Show the mutations of a patient inside a pathway schematic using PathwayMapper Example: Notch signaling pathway in a prostate cancer patient","New Feature: Display and compare generic assays, such as microbiome and treatment response, on the study view Example: Prasinovirus microbiome signatures in TCGA","New Feature: The Plots tab on Results View now allows you to group alterations by Driver and VUS Example: POLE driver mutations vs VUSs against mutation counts in TCGA Colorectal Adenocarcinoma"]},{"i":"december-31-2020","l":"December 31, 2020","p":["Added data consisting of 430 samples from 5 studies:","Juvenile Papillomatosis and Breast Cancer (MSK, 2020) 5 samples","Mixed cfDNA (MSKCC, 2020) 229 samples","Metastatic Melanoma (DFCI, Nature Medicine 2019) 144 samples","Lung Cancer (SMC, Cancer Research 2016) 22 samples","Upper Tract Urothelial Carcinoma (IGBMC, Genome Biology 2021) 30 samples","Added survival data to Breast Cancer (METABRIC, Nature 2012 & Nat Commun 2016)"]},{"i":"november-3-2020","l":"November 3, 2020","p":["New Feature: The map of local installations of cBioPortal is available now. Please consider registering your instance here. image","Enhancement: upgraded the Genomic Evolution tab in Patient View with timeline Example image"]},{"i":"october-20-2020","l":"October 20, 2020","p":["Enhancement: Expression tab has now been merged into the Plots tab image"]},{"i":"october-16-2020","l":"October 16, 2020","p":["Added data consisting of 25,078 samples from 5 studies:","Melanomas (TCGA, Cell 2015) 359 samples","Retinoblastoma cfDNA (MSKCC 2020) 14 samples","The Angiosarcoma Project (Provisional, July 2020) 83 samples","Bladder Cancer (MSK/TCGA, 2020) 476 samples","Cancer Therapy and Clonal Hematopoiesis (MSK, 2020) 24,146 samples","Added MSI data(MSIsensor from Mariamidze et al. 2018 and MANTIS scores from Roychowdhury et al. 2017) for all 32 TCGA PanCan Atlas Cohorts.","Added new profile“RNA-Seq V2 expression Z-scores relative to normal samples” for 16 TCGA PanCan Atlas Cohorts. The normals samples RNA-Seq V2 expression data were curated from GDC, and can be downloaded from our Datahub or Data Set page. Example: ERBB2 expression z-scores relative to normal expression","image"]},{"i":"october-13-2020","l":"October 13, 2020","p":["Enhancement: Study View now allows comparing samples with mutations or copy number alterations in different genes image","New Feature: When treatment timeline is available (e.g. in this study), Study View now allows the selection and comparison of patients treated with specific drugs, or samples sequenced pre or post specific drug treatments image"]},{"i":"september-30-2020","l":"September 30, 2020","p":["New Feature: Microbiome signature data is available for comparison now. Example: comparing colorectal subtypes for enriched microbiome signatures image"]},{"i":"september-22-2020","l":"September 22, 2020","p":["Enhancement: The timeline feature in Patient View has been refactored with an improved UI. Example image","Enhancement: Logrank p-values are now provided for all survival analysis (previously only availalbe when comparing two groups). Example"]},{"i":"august-11-2020","l":"August 11, 2020","p":["New Feature: microbiome data of TCGA samples from Poore et al. 2020 are now available for analysis in the OncoPrint and Plots tabs. Example: Orthohepadnavirus across TCGA cancers image","New Feature: You can now compare DNA Methylation data between groups using the Comparison feature. Example: Comparing DNA methylation levels between samples with high vs low BRCA1 expression image","Added data consisting of 513 samples from 3 studies:","Breast Cancer (SMC 2018) 187 samples","Germ Cell Tumors and Shared Leukemias (MSK 2020) 21 samples","Lung Adenocarcinoma (OncoSG, Nat Genet 2020) 305 samples","Added RPPA data in addition to the microbiome data for 31 TCGA Pancan studies (except LAML)"]},{"i":"july-21-2020","l":"July 21, 2020","p":["New Feature: The Mutations tab now has the option to show mutation effects for different transcripts / isoforms. Note that some annotation features are only available for the canonical isoform. example image","Enhancement: The Plots tab is now supported in multi-study queries. example image","New Feature: You can now share custom groups in the Study View example"]},{"i":"june-11-2020","l":"June 11, 2020","p":["Added data consisting of 267 samples from 2 studies:","Gastric Cancer (OncoSG, 2018) 147 samples","120 ctDNA samples added to Non-Small Cell Lung Cancer (TRACERx, NEJM & Nature 2017) 447 samples"]},{"i":"june-9-2020","l":"June 9, 2020","p":["Enhancement: using OQL to query for mutations based on a protein position range. example image","New Feature: you can now send the OncoPrint data to the OncoPrinter tool for customization. image","Enhancement: Mutational spectrum data can be downloaded from OncoPrint image"]},{"i":"june-2-2020","l":"June 2, 2020","p":["Enhancement: Pediatric cancer studies are now grouped and highlighted in the query page image"]},{"i":"may-6-2020","l":"May 6, 2020","p":["Added data consisting of 574 samples from 3 studies:","Uterine Sarcoma/Mesenchymal (MSK, Clin Cancer Res 2020) 108 samples","Metastatic castration-sensitive prostate cancer (MSK, Clin Cancer Res 2020) 424 samples","Glioblastoma (Columbia, Nat Med. 2019) 42 samples","Updated one study:","Expression data was added to The Metastatic Breast Cancer Project (Provisional, February 2020)."]},{"i":"april-24-2020","l":"April 24, 2020","p":["New Feature: Add a new chart on the Study View for selecting samples based on pre-defined case lists:"]},{"i":"april-10-2020","l":"April 10, 2020","p":["New Feature: Make cohorts on the Study View using continuous molecular profiles of one or more gene(s), such as mRNA expression, methylation, RPPA and continuous CNA. example","Combine this with the group comparison feature to compare e.g. all quartiles of expression:","New Feature: Annotate mutations using the Mutation Mapper Tool on the GRCh38 reference genome:","mutation_mapper_tool_grch38"]},{"i":"april-3-2020","l":"April 3, 2020","p":["New Feature: Extended the Comparison tab to support the comparison of altered samples per gene or alteration. This example query compares NSCLC patients with 1) both mutated and amplified EGFR, 2) mutated EGFR only, and 3) amplified EGFR only.","image"]},{"i":"march-27-2020","l":"March 27, 2020","p":["Enhancement: User selections in the Plots tab are now saved in the URL. example","New Feature: Added table of data availability per profile in the Study View. example"]},{"i":"march-20-2020","l":"March 20, 2020","p":["Enhancement: Extended Survival Analysis to support more outcome measures. example","image"]},{"i":"march-18-2020","l":"March 18, 2020","p":["Added data consisting of 1,393 samples from 3 studies:","Breast Cancer (Alpelisib plus AI, Nature Cancer 2020) 141 samples","Glioma (MSKCC, Clin Cancer Res 2019) 1,004 samples","Mixed cfDNA (MSK, Nature Medicine 2019) 248 samples"]},{"i":"march-3-2020","l":"March 3, 2020","p":["New Feature: Added Pathways tab to the Results View page, which visualizes the alteration frequencies of genes in pathways of interest. The pathways are pulled from https://www.pathwaymapper.org and shown in a read only view. One can edit these pathways in the PathwayMapper editor. For more information see the tutorial.","pathwaymapper_screenshot"]},{"i":"february-12-2020","l":"February 12, 2020","p":["Added data consisting of 1,605 samples from 3 studies:","Tumors with TRK fusions (MSK, 2019) 106 samples","Lymphoma Cell Lines (MSKCC, 2020) 34 samples","Prostate Adenocarcinoma (MSKCC, 2020) 1,465 samples"]},{"i":"february-6-2020","l":"February 6, 2020","p":["New Feature: Extend the recent group comparison feature by allowing comparisons inside the Results View page. The new tab allows for quick comparison of altered vs unaltered cases by survival, clinical information, mutation, copy number events and mRNA expression:","group_results640px","Performance enhancement: the Study View's mutation table now loads faster for studies with multiple gene panels. For the genie portal, which has a study with many different gene panels this resulted in a speed-up from ~ 90-120 seconds to 5 seconds.","Read more about the v3.2.2 release here"]},{"i":"january-30-2020","l":"January 30, 2020","p":["Enhancement: Show HGVSg in mutations table and linkout to Genome Nexus:","hgvsg genome nexus","Enhancement: Add a pencil button near gene list in results page which opens interface for quickly modifying the oql of the query:","edit query pencil","See more updates here"]},{"i":"january-29-2020","l":"January 29, 2020","p":["Added data consisting of 197 samples from 2 studies:","Bladder/Urinary Tract Cancer (MSK, 2019) 78 samples","Upper Tract Urothelial Carcinoma (MSK, 2019) 119 samples"]},{"i":"december-19-2019","l":"December 19, 2019","p":["Enhancement: We restored support for submitting large queries from external applications using HTTP POST requests. Accepted parameters are the same as appear in the url of a query submitted from the homepage.","See more updates here"]},{"i":"december-12-2019","l":"December 12, 2019","p":["Enhancement: Several enhancements to the display of gene panels on the Patient View page, by The Hyve, described in more detail here","image","Enhancement: Add Count Bubbles to Oncoprint Toolbar","Screenshot from 2019-12-06 11-36-21","See more updates here"]},{"i":"november-29-2019","l":"November 29, 2019","p":["Enhancement: Support group comparison for custom charts in Study View page","Enhancement: Performance improvement of Co-Expression analysis.","Enhancement: Kaplan-Meier plots now supports custom time range.","See more updates here"]},{"i":"november-22-2019","l":"November 22, 2019","p":["New Feature: Support for Treatment response data in the Oncoprint and Plots tab, including new Waterfall plot type. Read more in The Hyve's blog post","image"]},{"i":"november-15-2019","l":"November 15, 2019","p":["Enhancement: heatmap tracks in OncoPrint now has separate headers and sub-menus. example","image","Enhancement: global settings for query session"]},{"i":"november-7-2019","l":"November 7, 2019","p":["Added data consisting of 212 samples from 3 studies:","Metastatic Melanoma (DFCI, Science 2015) 110 samples","Melanoma (MSKCC, NEJM 2014) 64 samples","Metastatic Melanoma (UCLA, Cell 2016) 38 samples"]},{"i":"october-30-2019","l":"October 30, 2019","p":["Added data consisting of 178 samples from 2 studies:","Intrahepatic Cholangiocarcinoma (Shanghai, Nat Commun 2014) 103 samples","Non-Small Cell Lung Cancer (MSK, Cancer Cell 2018) 75 samples"]},{"i":"october-23-2019","l":"October 23, 2019","p":["Enhancement: Quick example links in Plots tab. example"]},{"i":"october-14-2019","l":"October 14, 2019","p":["New Feature: Fusion Genes table in Study View. example","image"]},{"i":"october-11-2019","l":"October 11, 2019","p":["Enhancement: The Download interface on the homepage has been removed. Enhanced download functionality is now available after querying on the results page.","Home page:","homepage download tab removed","Results page:","results page download tab","Note that as before one can always download the full raw data on the Data Sets page or from Datahub."]},{"i":"october-9-2019","l":"October 9, 2019","p":["Added data consisting of 2725 samples from 4 studies:","Cancer Cell Line Encyclopedia (Broad, 2019) 1739 samples","Chronic Lymphocytic Leukemia (Broad, Nature 2015) 537 samples","Rectal Cancer (MSK,Nature Medicine 2019) 339 samples","Colon Cancer (CPTAC-2 Prospective, Cell 2019) 110 samples","Updated Esophageal Carcinoma (TCGA, Nature 2017) with addition of CNA data for Esophageal Squamous Cell Carcinoma cases 90 samples."]},{"i":"september-18-2019","l":"September 18, 2019","p":["New Feature: The list and order of charts of a study will be automatically saved now as a user preference on the study view page."]},{"i":"september-6-2019","l":"September 6, 2019","p":["Added data consisting of 1216 samples from 3 studies:","Breast Cancer (MSKCC, 2019) 70 samples","Brain Tumor PDXs (Mayo Clinic, 2019) 97 samples","Adenoid Cystic Carcinoma Project (2019) 1049 samples"]},{"i":"august-13-2019","l":"August 13, 2019","p":["Added data consisting of 295 samples from 3 studies:","Pediatric Preclinical Testing Consortium (PPTC, 2019) 261 samples","Non-small cell lung cancer (MSK, Science 2015) 16 samples","Prostate Cancer (MSK, 2019) 18 samples"]},{"i":"july-26-2019","l":"July 26, 2019","p":["Added data consisting of 35 samples from 1 study:","Added Hypoxia data for:","Brain Lower Grade Glioma (TCGA, PanCancer Atlas)","Breast Invasive Carcinoma (TCGA, PanCancer Atlas)","Cervical Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Clear Cell Renal Cell Carcinoma (DFCI, Science 2019) 35 samples","Colorectal Adenocarcinoma (TCGA, PanCancer Atlas)","Glioblastoma Multiforme (TCGA, PanCancer Atlas)","Head and Neck Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Kidney Renal Clear Cell Carcinoma (TCGA, PanCancer Atlas)","Kidney Renal Papillary Cell Carcinoma (TCGA, PanCancer Atlas)","Liver Hepatocellular Carcinoma (TCGA, PanCancer Atlas)","Lung Adenocarcinoma (TCGA, PanCancer Atlas)","Lung Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Ovarian Serous Cystadenocarcinoma (TCGA, PanCancer Atlas)","Pancreatic Adenocarcinoma (TCGA, PanCancer Atlas)","Pheochromocytoma and Paraganglioma (TCGA, PanCancer Atlas)","Prostate Adenocarcinoma (TCGA, PanCancer Atlas)","Skin Cutaneous Melanoma (TCGA, PanCancer Atlas)","Thyroid Carcinoma (TCGA, PanCancer Atlas)","Uterine Corpus Endometrial Carcinoma (TCGA, PanCancer Atlas)"]},{"i":"july-24-2019","l":"July 24, 2019","p":["Added data consisting of 151 samples from 1 study:","Myeloproliferative Neoplasms (CIMR, NEJM 2013) 151 samples"]},{"i":"july-13-2019","l":"July 13, 2019","p":["Public Release 6.1 of AACR Project GENIE:","The sixth data set, GENIE 6.0-public, was released in early July 2019. A patch to GENIE 6.0-public, GENIE 6.1-pubic, was subsequently released on July 13, 2019. The combined data set now includes nearly 70,000 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. The combined data set now includes data for nearly 80 major cancer types, including data from nearly 11,000 patients with lung cancer, greater than 9,700 patients with breast cancer, and nearly 7,000 patients with colorectal cancer.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"july-2-2019","l":"July 2, 2019","p":["Added data consistng of 785 samples from 4 studies:","Non-Small Cell Lung Cancer (TRACERx, NEJM 2017) 327 samples","Acute myeloid leukemia or myelodysplastic syndromes (WashU, 2016) 136 samples","Basal Cell Carcinoma (UNIGE, Nat Genet 2016) 293 samples","Colon Adenocarcinoma (CaseCCC, PNAS 2015) 29 samples"]},{"i":"june-19-2019","l":"June 19, 2019","p":["New Feature: Show Genome Aggregation Database (gnomAD) population frequencies in the mutations table - see example:","gnomad feature news"]},{"i":"june-12-2019","l":"June 12, 2019","p":["Added data of 1350 samples from 3 studies:","Pheochromocytoma and Paraganglioma (TCGA, Cell 2017) 178 samples","Metastatic Solid Cancers (UMich, Nature 2017) 500 samples","Acute Myeloid Leukemia (OHSU, Nature 2018) 672 samples","Added survival data for TCGA PanCan Atlas Cohorts (>10,000 samples across 33 tumor types).","Added hypoxia data for Bladder Urothelial Carcinoma (TCGA, PanCancer Atlas)"]},{"i":"june-7-2019","l":"June 7, 2019","p":["New Group Comparison Feature: Compare clinical and genomic features of user-defined groups of samples/patients. View Tutorial","group-comparison"]},{"i":"may-8-2019","l":"May 8, 2019","p":["New Feature: Show Post Translational Modification (PTM) information from dbPTM on the Mutation Mapper- see example:","ptm feature_news"]},{"i":"april-26-2019","l":"April 26, 2019","p":["Added data of 568 samples from 4 studies:","Adenoid Cystic Carcinoma (JHU, Cancer Prev Res 2016) 25 samples","Histiocytosis Cobimetinib (MSK, Nature 2019) 52 samples","Upper Tract Urothelial Carcinoma (Cornell/Baylor/MDACC, Nat Comm 2019) 47 samples","Metastatic Prostate Adenocarcinoma (SU2C/PCF Dream Team, PNAS 2019) 444 samples"]},{"i":"march-29-2019","l":"March 29, 2019","p":["New Feature: Use the new quick search tab on the homepage to more easily navigate to a study, gene or patient:","quick_search_news"]},{"i":"march-15-2019","l":"March 15, 2019","p":["Added data of 338 samples from 4 studies:","Adenoid Cystic Carcinoma (MGH, Nat Gen 2016) 10 samples","Gallbladder Cancer (MSK, Cancer 2018) 103 samples","The Metastatic Prostate Cancer Project (Provisional, December 2018) 19 samples","Adult Soft Tissue Sarcomas (TCGA, Cell 2017) 206 samples"]},{"i":"february-22-2019","l":"February 22, 2019","p":["Enhancement: Exon number and HGVSc annotations are available in optional columns in the Mutations tab on the Results page and in the Patient View.","New feature: option to a show regression line in the scatter plot in the Plots tab on the Results page","image"]},{"i":"february-19-2019","l":"February 19, 2019","p":["New feature: Copy-Number Segments tab on the Study View page using igv.js v2- see example","Improved Copy-Number Segments tab on the Results page","New feature: OncoKB and Cancer Hotspots tracks in the Mutations tab on the Results page","image"]},{"i":"january-24-2019","l":"January 24, 2019","p":["Added data of 2328 samples from 8 studies:","Uveal Melanoma (QIMR, Oncotarget 2016) 28 samples","Squamous Cell Carcinoma of the Vulva (CUK, Exp Mol Med 2018) 15 samples","TMB and Immunotherapy (MSKCC, Nat Genet 2019) 1661 samples","Glioma (MSK, 2018) 91 samples","Urothelial Carcinoma (Cornell/Trento, Nat Gen 2016) 72 samples","Hepatocellular Carcinoma (MSK, Clin Cancer Res 2018) 127 samples","MSK Thoracic PDX (MSK, Provisional) 139 samples","Cholangiocarcinoma (MSK, Clin Cancer Res 2018) 195 samples","Updated data for The Metastatic Breast Cancer Project (Provisional, October 2018) 237 samples"]},{"i":"january-10-2019","l":"January 10, 2019","p":["cBioPortal now supports queries for driver mutations, fusions and copy number alterations as well as germline/somatic mutations using Onco Query Language (OQL)-- see example","A new tutorial explores OQL and provides examples of how OQL can be a powerful tool to refine queries."]},{"i":"december-17-2018","l":"December 17, 2018","p":["The 10th phase of cBioPortal architectural upgrade is now complete: the Study View has been moved to the new architecture with numerous improvements. This marks the completion of the cBioPortal architectural refactoring! \uD83C\uDF89\uD83C\uDF89\uD83C\uDF89","image"]},{"i":"october-29-2018","l":"October 29, 2018","p":["The ninth phase of the cBioPortal architectural upgrade is now complete: the results page is now a single-page application with better performance.","Supported plotting mutations by type in Plots tab","image"]},{"i":"october-19-2018","l":"October 19, 2018","p":["Support selection of transcript of interest in the MutationMapper tool via Genome Nexus.","mutation_mapper_dropdown"]},{"i":"october-17-2018","l":"October 17, 2018","p":["Added data of 3578 samples from 8 studies:","Rhabdoid Cancer (BCGSC, Cancer Cell 2016) 40 samples","Diffuse Large B-Cell Lymphoma (Duke, Cell 2017) 1001 samples","Diffuse Large B cell Lymphoma (DFCI, Nat Med 2018) 135 samples","Breast Fibroepithelial Tumors (Duke-NUS, Nat Genet 2015) 22 samples","Uterine Clear Cell Carcinoma (NIH, Cancer 2017) 16 samples","Endometrial Cancer (MSK, 2018) 197 samples","Breast Cancer (MSK, Cancer Cell 2018) 1918 samples","MSS Mixed Solid Tumors (Van Allen, 2018) 249 samples","Updated data for The Angiosarcoma Project (Provisional, September 2018) 48 samples"]},{"i":"august-20-2018","l":"August 20, 2018","p":["Now you can log in on the public cBioPortal with your Google account and save your virtual studies for quick analysis.","image"]},{"i":"august-7-2018","l":"August 7, 2018","p":["The eighth phase of the cBioPortal architectural upgrade is now complete: The Plots, Expression, Network, and Bookmarks tabs, and therefore all analysis tabs in the results page, have been moved to the new architecture.","Updated the MutationMapper tool, now connecting to Genome Nexus for annotating mutations on the fly.","Total Mutations and Fraction Genome Altered are now available in Plots tab for visualization and analysis.","Enhanced clinical attribute selector for OncoPrint, now showing sample counts per attribute.","image"]},{"i":"july-27-2018","l":"July 27, 2018","p":["Added data of 2787 samples from 10 studies:","Mixed Tumors (PIP-Seq 2017) 103 samples","Nonmuscle Invasive Bladder Cancer (MSK Eur Urol 2017) 105 samples","Pediatric Neuroblastoma (TARGET, 2018) 1089 samples","Pediatric Pan-Cancer (DKFZ - German Cancer Consortium, 2017) 961 samples","Skin Cutaneous Melanoma (Broad, Cancer Discov 2014) 78 samples","Cutaneous Squamous Cell Carcinoma (MD Anderson, Clin Cancer Res 2014) 39 samples","Diffuse Large B-cell Lymphoma (BCGSC, Blood 2013) 53 samples","Non-Hodgkin Lymphoma (BCGSC, Nature 2011) 14 samples","Chronic lymphocytic leukemia (ICGA, Nat 2011) 105 samples","Neuroblastoma (Broad Institute 2013) 240 samples"]},{"i":"june-20-2018","l":"June 20, 2018","p":["The seventh phase of the cBioPortal architectural upgrade is now complete: The Enrichments and Co-Expression tabs have been moved to the new architecture.","Supported merged gene tracks in OncoPrint and Onco Query Language-- see example","image"]},{"i":"may-10-2018","l":"May 10, 2018","p":["Enhanced OncoPrint to show germline mutations -- see example","image"]},{"i":"april-17-2018","l":"April 17, 2018","p":["Acute Lymphoblastic Leukemia (St Jude, Nat Genet 2016) 73 samples","Added data of 3416 samples from 10 published studies:","Added data of 3732 samples from 4 TARGET studies:","Bladder Cancer (TCGA, Cell 2017) 413 samples","Colorectal Cancer (MSK, Cancer Cell 2018) 1134 samples","Metastatic Esophagogastric Cancer (MSK,Cancer Discovery 2017) 341 samples","Non-Small Cell Lung Cancer (MSK, JCO 2018) 240 samples","Pediatric Acute Lymphoid Leukemia - Phase II (TARGET, 2018) 1978 samples","Pediatric Acute Myeloid Leukemia (TARGET, 2018) 1025 samples","Pediatric Rhabdoid Tumor (TARGET, 2018) 72 samples","Pediatric Wilms' Tumor (TARGET, 2018) 657 samples","Prostate Adenocarcinoma (EurUrol, 2017) 65 samples","Prostate Adenocarcinoma (MSKCC/DFCI, Nature Genetics 2018) 1013 samples","Small-Cell Lung Cancer (Multi-Institute 2017) 20 samples","The Angiosarcoma Project (Provisional, February 2018) 14 samples","Updated Segment data and Allele Frequencies for The Metastatic Breast Cancer Project (Provisional, October 2017) 103 samples"]},{"i":"april-5-2018","l":"April 5, 2018","p":["Acute Myeloid Leukemia (TCGA, PanCancer Atlas)","Added data from the TCGA PanCanAtlas project with >10,000 samples from 33 tumor types:","Adrenocortical Carcinoma (TCGA, PanCancer Atlas)","Bladder Urothelial Carcinoma (TCGA, PanCancer Atlas)","Brain Lower Grade Glioma (TCGA, PanCancer Atlas)","Breast Invasive Carcinoma (TCGA, PanCancer Atlas)","Cervical Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Cholangiocarcinoma (TCGA, PanCancer Atlas)","Colon Adenocarcinoma (TCGA, PanCancer Atlas)","Diffuse Large B-Cell Lymphoma (TCGA, PanCancer Atlas)","Esophageal Adenocarcinoma (TCGA, PanCancer Atlas)","Glioblastoma Multiforme (TCGA, PanCancer Atlas)","Head and Neck Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Kidney Chromophobe (TCGA, PanCancer Atlas)","Kidney Renal Clear Cell Carcinoma (TCGA, PanCancer Atlas)","Kidney Renal Papillary Cell Carcinoma (TCGA, PanCancer Atlas)","Liver Hepatocellular Carcinoma (TCGA, PanCancer Atlas)","Lung Adenocarcinoma (TCGA, PanCancer Atlas)","Lung Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Mesothelioma (TCGA, PanCancer Atlas)","Ovarian Serous Cystadenocarcinoma (TCGA, PanCancer Atlas)","Pancreatic Adenocarcinoma (TCGA, PanCancer Atlas)","Pheochromocytoma and Paraganglioma (TCGA, PanCancer Atlas)","Prostate Adenocarcinoma (TCGA, PanCancer Atlas)","Rectum Adenocarcinoma (TCGA, PanCancer Atlas)","Sarcoma (TCGA, PanCancer Atlas)","Skin Cutaneous Melanoma (TCGA, PanCancer Atlas)","Stomach Adenocarcinoma (TCGA, PanCancer Atlas)","Testicular Germ Cell Tumors (TCGA, PanCancer Atlas)","Thymoma (TCGA, PanCancer Atlas)","Thyroid Carcinoma (TCGA, PanCancer Atlas)","Uterine Carcinosarcoma (TCGA, PanCancer Atlas)","Uterine Corpus Endometrial Carcinoma (TCGA, PanCancer Atlas)","Uveal Melanoma (TCGA, PanCancer Atlas)"]},{"i":"march-20-2018","l":"March 20, 2018","p":["The sixth phase of the cBioPortal architectural upgrade is now complete: The Download tab has been moved to the new architecture.","Data can now be downloaded in tabular format from OncoPrint.","Added an option to download an SVG file on the Cancer Type Summary tab."]},{"i":"january-15-2018","l":"January 15, 2018","p":["The fifth phase of the cBioPortal architectural upgrade is now complete: The OncoPrint and Survival tabs have been moved to the new architecture."]},{"i":"november-20-2017","l":"November 20, 2017","p":["You can now combine multiple studies and view them on the study summary page. Example: liver cancer studies","You can now bookmark or share your selected samples as virtual studies with the share icon on the study summary page. Example: a virtual study of breast tumors","Cross-study query reimplemented: Now you can view an OncoPrint of multiple studies. Example: querying NSCLC tumors from 5 studies","image"]},{"i":"october-17-2017","l":"October 17, 2017","p":["The fourth phase of the cBioPortal architectural upgrade is now complete: The Mutual Exclusivity and Cancer Type Summary tabs have been moved to the new architecture.","Updated protein structure alignment data in Mutations tab are now retrieved from Genome Nexus via the G2S web service."]},{"i":"october-2-2017","l":"October 2, 2017","p":["Added data of 1646 samples from 7 published studies:","NGS in Anaplastic Oligodendroglioma and Anaplastic Oligoastrocytomas tumors (MSK, Neuro Oncol 2017) 22 samples","MSK-IMPACT Clinical Sequencing Cohort for Non-Small Cell Cancer (MSK, Cancer Discovery 2017) 915 samples","Paired-exome sequencing of acral melanoma (TGEN, Genome Res 2017) 38 samples","MSK-IMPACT Clinical Sequencing Cohort in Prostate Cancer (MSK, JCO Precision Oncology 2017) 504 samples","Whole-exome sequences (WES) of pretreatment melanoma tumors (UCLA, Cell 2016) 39 samples","Next generation sequencing (NGS) of pre-treatment metastatic melanoma samples (MSK, JCO Precision Oncology 2017) 66 samples","Targeted gene sequencing in 62 high-grade primary Unclassified Renal Cell Carcinoma (MSK, Nature 2016) 62 samples","Updated data for MSK-IMPACT Clinical Sequencing Cohort (MSK, Nat Med 2017) with overall survival data."]},{"i":"august-3-2017","l":"August 3, 2017","p":["The third phase of the cBioPortal architectural upgrade is now complete: The Mutations tab now has a fresh look and faster performance -- see example","image","Variant interpretations from the CIViC database are now integrated into the annotation columns on the Mutations tab and in the patient view pages","New summary graph for all cancer studies and samples on the front page"]},{"i":"june-26-2017","l":"June 26, 2017","p":["The second phase of the cBioPortal architectural upgrade is now complete: The query interface now has a fresh look and faster performance.","image"]},{"i":"may-12-2017","l":"May 12, 2017","p":["Added data of 12,211 samples from 11 published studies:","MSK-IMPACT Clinical Sequencing Cohort (MSK, Nat Med 2017) 10,945 samples","Whole-genome sequencing of pilocytic astrocytomasatic (DKFZ, Nat Genetics, 2013) 96 samples","Hepatocellular Carcinomas (INSERM, Nat Genet 2015) 243 samples","Cystic Tumor of the Pancreas (Johns Hopkins, PNAS 2011) 32 samples","Whole-Genome Sequencing of Pancreatic Neuroendocrine Tumors (ARC- Net, Nature, 2017) 98 samples","Medulloblastoma (Sickkids, Nature 2016) 46 samples","Genetic Characterization of NSCLC young adult patients (University of Turin, Lung Cancer 2016) 41 samples","Genomic Profile of Patients with Advanced Germ Cell Tumors (MSK, JCO 2016). 180 samples","Ampullary Carcinoma (Baylor, Cell Reports 2016) 160 samples","Mutational profiles of metastatic breast cancer (INSERM, 2016) 216 samples","Prostate Adenocarcinoma (Fred Hutchinson CRC, Nat Med 2016) 154 samples"]},{"i":"may-5-2017","l":"May 5, 2017","p":["First phase of cBioPortal architectural upgrade complete: Patient view now has fresh look and faster performance. example"]},{"i":"march-28-2017","l":"March 28, 2017","p":["New features:","Per-sample mutation spectra are now available in OncoPrints -- see example","image","mRNA heat map clustering is now supported in OncoPrints","MDACC Next-Generation Clustered Heat Maps are now available in the patient view","cBioPortal web site style change"]},{"i":"feburary-2-2017","l":"Feburary 2, 2017","p":["New features:","3D hotspot mutation annotations are now available from 3dhotspots.org","New data:","CPTAC proteomics data have been integrated for TCGA breast, ovarian, and colorectal provisional studies"]},{"i":"december-23-2016","l":"December 23, 2016","p":["New features:","Heat map visualization of gene expression data in the OncoPrint","OncoPrint Heatmap","Heat map visualization of gene expression data in the Study View page connecting to MDACC's TCGA Next-Generation Clustered Heat Map Compendium"]},{"i":"october-7-2016","l":"October 7, 2016","p":["New features:","All data sets can now be downloaded as flat files from the new Data Hub","Annotation of putative driver missense mutations in OncoPrints, based on OncoKB, mutation hotspots, and recurrence in cBioPortal and COSMIC","OncoPrint-OncoKB","Copy number segments visualization directly in the browser in a new CN Segments tab via IGV.js","image","Improvements:","Improved cancer study view page (bug fixes and increased performance)"]},{"i":"july-24-2016","l":"July 24, 2016","p":["Added data of 4,375 samples from 21 published studies:","Adenoid Cystic Carcinoma (FMI, Am J Surg Pathl. 2014) 28 samples","Adenoid Cystic Carcinoma (MDA, Clin Cancer Res 2015) 102 samples","Adenoid Cystic Carcinoma (Sanger/MDA, JCI 2013) 24 samples","Adenoid Cystic Carcinoma of the Breast (MSKCC, J Pathol. 2015) 12 samples","Bladder Cancer, Plasmacytoid Variant (MSKCC, Nat Genet 2016) 34 samples","Breast Cancer (METABRIC, Nat Commun 2016) 1980 samples","Chronic Lymphocytic Leukemia (Broad, Cell 2013) 160 samples","Chronic Lymphocytic Leukemia (IUOPA, Nature 2015) 506 samples","Colorectal Adenocarcinoma (DFCI, Cell Reports 2016) 619 samples","Cutaneous T Cell Lymphoma (Columbia U, Nat Genet 2015) 42 samples","Diffuse Large B-Cell Lymphoma (Broad, PNAS 2012) 58 samples","Hepatocellular Adenoma (Inserm, Cancer Cell 2014) 46 samples","Hypodiploid Acute Lymphoid Leukemia (St Jude, Nat Genet 2013) 44 samples","Insulinoma (Shanghai, Nat Commun 2013) 10 samples","Malignant Pleural Mesothelioma (NYU, Cancer Res 2015) 22 samples","Mantle Cell Lymphoma (IDIBIPS, PNAS 2013) 29 samples","Myelodysplasia (Tokyo, Nature 2011) 29 samples","Neuroblastoma (Broad, Nat Genet 2013) 56 samples","New TCGA study:","OncoTree codes assigned per sample","Oral Squamous Cell Carcinoma (MD Anderson, Cancer Discov 2013) 40 samples","Pan-Lung Cancer (TCGA, Nat Genet 2016) 1144 samples","Pancreatic Adenocarcinoma (QCMG, Nature 2016) 383 samples","Recurrent and Metastatic Head & Neck Cancer (JAMA Oncology, 2016) 151 samples","RPPA data updated with the latest data from MD Anderson","Updated TCGA provisional studies","updated to the Firehose run of January 28, 2016"]},{"i":"june-6-2016","l":"June 6, 2016","p":["New features:","Annotation of mutation effect and drug sensitivity on the Mutations tab and the patient view pages (via OncoKB) oncokb-screenshot","Improvements:","Improved OncoPrint visualization using WebGL: faster, more zooming flexibility, visualization of recurrent variants","Improved Network tab with SBGN view for a single interaction","Performance improvement of tables in the study view page","Mutation type summary on the Mutations tab"]},{"i":"march-31-2016","l":"March 31, 2016","p":["New features:","Visualization of \"Enrichments Analysis\" results via volcano plots","Improved performance of the cross cancer expression view by switching to Plot.ly graphs","Improvements to the \"Clinical Data\" tab on the study view page","More customization options for the cross-cancer histograms","Performance improvements in the study view and query result tabs","Added data of 1235 samples from 3 published studies:","Merged Cohort of LGG and GBM (TCGA, 2016)","Lung Adenocarcinoma (MSKCC, 2015)","Poorly-Differentiated and Anaplastic Thyroid Cancers (MSKCC, JCI 2016)"]},{"i":"january-12-2016","l":"January 12, 2016","p":["Acinar Cell Carcinoma of the Pancreas (Johns Hopkins, J Pathol 2014)","Added data of 650 samples from 10 published studies:","All mutation data mapped to UniProt canonical isoforms","All TCGA data updated to the latest Firehose run of August 21, 2015","Bladder Urothelial Carcinoma (Dana Farber & MSKCC, Cancer Discovery 2014)","Cholangiocarcinoma (TCGA, Provisional)","Clear Cell Renal Cell Carcinoma (U Tokyo, Nat Genet 2013)","Desmoplastic Melanoma (Broad Institute, Nat Genet 2015)","Esophageal Squamous Cell Carcinoma (UCLA, Nat Genet 2014)","Gastric Adenocarcinoma (TMUCIH, PNAS 2015)","Low-Grade Gliomas (UCSF, Science 2014)","Mesothelioma (TCGA, Provisional)","Multiregion Sequencing of Clear Cell Renal Cell Carcinoma (IRC, Nat Genet 2014)","Neuroblastoma (AMC Amsterdam, Nature 2012)","New features:","New TCGA studies:","Primary Central Nervous System Lymphoma (Mayo Clinic, Clin Cancer Res 2015)","Testicular Germ Cell Cancer (TCGA, Provisional)","Thymoma (TCGA, Provisional)","Visualization of multiple samples in a patient","Visualization of timeline data of a patient ( example) timeline-example"]},{"i":"december-23-2015","l":"December 23, 2015","p":["New features:","Visualization of RNA-seq expression levels across TCGA studies (cross-cancer queries) cross cancer expression","Selection of genes in the study view to initiate queries query gene in study view","Improvement:","3-D structures in the \"Mutations\" tab are now rendered by 3Dmol.js (previously JSmol)","Improved performance by code optimization and compressing large data by gzip"]},{"i":"december-1-2015","l":"December 1, 2015","p":["New feature: Annotated statistically recurrent hotspots, via new algorithm by Chang et al. 2015 Annotate recurrent hotspots"]},{"i":"november-9-2015","l":"November 9, 2015","p":["New features:","Links to MyCancerGenome.org for mutations Link to MyCancerGenome.org","Improved display of selection samples on the study view page","Improvements:","\"Enrichments\" analysis is now run across all genes","The \"Network\" tab is now using Cytoscape.js (Adobe Flash is no longer required)"]},{"i":"october-6-2015","l":"October 6, 2015","p":["Added data of 763 samples from 12 published studies:","Breast Invasive Carcinoma (TCGA, Cell 2015)","Cutaneous squamous cell carcinoma (DFCI, Clin Cancer Res 2015)","Ewing Sarcoma (Institut Cuire, Cancer Discov 2014)","Gallbladder Carcinoma (Shanghai, Nat Genet 2014)","Infant MLL-Rearranged Acute Lymphoblastic Leukemia (St Jude, Nat Genet 2015)","Microdissected Pancreatic Cancer Whole Exome Sequencing (UTSW, Nat Commun 2015)","New TCGA data:","Pancreatic Neuroendocrine Tumors (JHU, Science 2011)","Pediatric Ewing Sarcoma (DFCI, Cancer Discov 2014)","Prostate Adenocarcinoma (TCGA, in press)","Renal Non-Clear Cell Carcinoma (Genentech, Nat Genet 2014)","Rhabdomyosarcoma (NIH, Cancer Discov 2014)","Small Cell Lung Cancer (U Cologne, Nature 2015)","Thymic epithelial tumors (NCI, Nat Genet 2014)","Uterine Carcinosarcoma (JHU, Nat Commun 2014)","Uveal Melanoma (TCGA, Provisional)"]},{"i":"august-21-2015","l":"August 21, 2015","p":["All TCGA data updated to the Firehose run of April 16, 2015.","New feature: Enrichments Analysis finds alterations that are enriched in either altered or unaltered samples.","Improvement: improved OncoPrint with better performance."]},{"i":"june-3-2015","l":"June 3, 2015","p":["Improvements:","Allowed downloading data in each chart/table in study summary page.","Added log-rank test p-values to the survival plots in study summary page.","Improved visualization of patient clinical data in patient-centric view.","Added option to merge multiple samples for the same patient in OncoPrint."]},{"i":"april-28-2015","l":"April 28, 2015","p":["New features:","Redesigned query interface to allow selecting multiple cancer studies","Redesigned Plots tab"]},{"i":"january-20-2015","l":"January 20, 2015","p":["All TCGA data updated to the Firehose run of October 17, 2014","COSMIC data updated to V71","New features:","Query page: better search functions to find cancer studies","OncoPrints now support color coding of different mutation types","OncoPrints now support multiple clinical annotation tracks","OncoPrinter tool now supports mRNA expression changes Oncoprint with multiple clinical tracks"]},{"i":"january-6-2015","l":"January 6, 2015","p":["New feature: You can now view frequencies of mutations and copy-number alterations in the study view. These tables are updated dynamically when selecting subsets of samples. Alterations in heavily copy-number altered endometrial cancer cases"]},{"i":"december-9-2014","l":"December 9, 2014","p":["New TCGA data:","Added complete and up-to-date clinical data for all TCGA provisional studies","All TCGA data updated to the Firehose run of July 15, 2014","New TCGA provisional studies: Esophageal cancer, Pheochromocytoma and Paraganglioma (PCPG)","New published TCGA studies: Thyroid Cancer and Kidney Chromophobe","Added data of 172 samples from 4 published studies:","Cholangiocarcinoma (National University of Singapore, Nature Genetics 2012)","Cholangiocarcinoma (National Cancer Centre of Singapore, Nature Genetics 2013)","Intrahepatic Cholangiocarcinoma (Johns Hopkins University, Nature Genetics 2013)","Bladder Cancer (MSKCC, Eur Urol 2014)","New features:","Redesigned Mutual Exclusivity tab","Added correlation scores for scatter plots on the Plots tab","Download links to GenomeSpace"]},{"i":"october-24-2014","l":"October 24, 2014","p":["Added data of 885 samples from 11 published studies:","Colorectal Adenocarcinoma Triplets (MSKCC, Genome Biology 2014)","Esophageal Squamous Cell Carcinoma (ICGC, Nature 2014)","Malignant Peripheral Nerve Sheath Tumor (MSKCC, Nature Genetics 2014)","Melanoma (Broad/Dana Farber, Nature 2012)","Nasopharyngeal Carcinoma (National University Singapore, Nature Genetics 2014)","Prostate Adenocarcinoma CNA study (MSKCC, PNAS 2014)","Prostate Adenocarcinoma Organoids (MSKCC, Cell 2014)","Stomach Adenocarcinoma (TCGA, Nature 2014)","Stomach Adenocarcinoma (Pfizer and University of Hong Kong, Nature Genetics 2014)","Stomach Adenocarcinoma (University of Hong Kong, Nature Genetics 2011)","Stomach Adenocarcinoma (University of Tokyo, Nature Genetics 2014)"]},{"i":"august-8-2014","l":"August 8, 2014","p":["Released two new tools","Oncoprinter lets you create Oncoprints from your own, custom data","MutationMapper draws mutation diagrams (lollipop plots) from your custom data"]},{"i":"may-21-2014","l":"May 21, 2014","p":["All TCGA data updated to the Firehose run of April 16, 2014"]},{"i":"may-12-2014","l":"May 12, 2014","p":["Improved study summary page including survival analysis based on clinical attributes e.g. TCGA Endometrial Cancer cohort Study view"]},{"i":"march-27-2014","l":"March 27, 2014","p":["New features:","Visualizing of mutations mapped on 3D structures (individual or multiple mutations, directly in the browser)","Gene expression correlation analysis (find all genes with expression correlation to your query genes)","The Patient-Centric View now displays mutation frequencies across all cohorts in cBioPortal for each mutation","The Mutation Details Tab and the Patient-Centric View now display the copy-number status of each mutation 3D viewer & Co-expression"]},{"i":"march-18-2014","l":"March 18, 2014","p":["Added mutation data of 898 samples from 11 published studies:","Added two new provisional TCGA studies:","Adrenocortical Carcinoma","All TCGA data updated to the Firehose run of January 15, 2014","Hepatocellular Carcinoma (AMC, Hepatology in press)","Hepatocellular Carcinoma (RIKEN, Nature Genetics 2012)","Medulloblastoma (Broad, Nature 2012)","Medulloblastoma (ICGC, Nature 2012)","Medulloblastoma (PCGP, Nature 2012)","Multiple Myeloma (Broad, Cancer Cell 2014)","NCI-60 Cell Lines (NCI, Cancer Res. 2012)","Pancreatic Adenocarcinoma (ICGC, Nature 2012)","Small Cell Carcinoma of the Ovary (MSKCC, Nature Genetics in press)","Small Cell Lung Cancer (CLCGP, Nature Genetics 2012)","Small Cell Lung Cancer (Johns Hopkins, Nature Genetics 2012)","Updated to the latest COSMIC data (v68)","Uterine Carcinosarcoma"]},{"i":"december-9-2013","l":"December 9, 2013","p":["Added mutation data of 99 bladder cancer samples (BGI, Nature Genetics 2013)"]},{"i":"december-6-2013","l":"December 6, 2013","p":["Data sets matching four recently submitted or published TCGA studies are now available","Glioblastoma (Cell 2013)","Bladder carcinoma (Nature, in press)","Head & neck squamous cell carcinoma (submitted)","Lung adenocarcinoma (submitted)"]},{"i":"november-8-2013","l":"November 8, 2013","p":["All TCGA data updated to the Firehose run of September 23, 2013.","Updated to the latest COSMIC data (v67).","Added mutation data of 792 samples from 9 published cancer studies:","Esophageal Adenocarcinoma (Broad, Nature Genetics 2013)","Head and Neck Squamous Cell Carcinoma (Broad, Science 2011)","Head and Neck Squamous Cell Carcinoma (Johns Hopkins, Science 2011)","Kidney Renal Clear Cell Carcinoma (BGI, Nature Genetics 2012)","Prostate Adenocarcinoma, Metastatic (Michigan, Nature 2012)","Prostate Adenocarcinoma (Broad/Cornell, Nature Genetics 2012)","Prostate Adenocarcinoma (Broad/Cornell, Cell 2013)","Skin Cutaneous Melanoma (Yale, Nature Genetics 2012)","Skin Cutaneous Melanoma (Broad, Cell 2012)"]},{"i":"october-21-2013","l":"October 21, 2013","p":["Improved interface for survival plots, including information on individual samples via mouse-over","New fusion glyph in OncoPrints FGFR3 fusions in head and neck carcinoma","Improved cross-cancer query: new alteration frequency histogram (example below - query gene: CDKN2A) and mutation diagram Cross Cancer Query"]},{"i":"september-9-2013","l":"September 9, 2013","p":["Updated COSMIC data (v66 Release)","Improved / interactive visualization on the \"Protein changes\" tab","Enhanced mutation diagrams: color-coding by mutation time and syncing with table filters","Addition of DNA cytoband information in the patient view of copy-number changes","OncoPrints now allow the display of an optional track with clinical annotation (Endometrial cancer example below) Oncoprint with clinical track"]},{"i":"july-25-2013","l":"July 25, 2013","p":["Multi-gene correlation plots.","Variant allele frequency distribution plots for individual tumor samples.","Tissue images for TCGA samples in the patient view, via Digital Slide Archive. Example."]},{"i":"july-16-2013","l":"July 16, 2013","p":["All TCGA data updated to the May Firehose run (May 23, 2013).","TCGA Pancreatic Cancer study (provisional) added."]},{"i":"july-4-2013","l":"July 4, 2013","p":["Improved rendering of mutation diagrams, including ability to download in PDF format.","Improved home page: Searchable cancer study & gene set selectors, data sets selector."]},{"i":"june-17-2013","l":"June 17, 2013","p":["Improved interface for correlation plots, including information on individual samples via mouse-over.","Gene Details from Biogene are now available in the Network view.","Added mutation and copy number data from a new adenoid cystic carcinoma study: Ho et al., Nature Genetics 2013.","Added mutation data from 6 cancer studies.","Breast Invasive Carcinoma (Shah et al., Nature 2012)","Breast Invasive Carcinoma (Banerji et al., Nature 2012)","Breast Invasive Carcinoma (Stephens et al., Nature 2012)","Lung Adenocarcinoma (Imielinksi et al., Cell 2012)","Lung Adenocarcinoma (Ding et al., Nature 2008)","Colorectal Cancer (Seshagiri et al., Nature 2012)"]},{"i":"june-4-2013","l":"June 4, 2013","p":["All TCGA data updated to the April Firehose run (April 21, 2012)."]},{"i":"may-14-2013","l":"May 14, 2013","p":["Added a published TCGA study: Acute Myeloid Leukemia (TCGA, NEJM 2013)."]},{"i":"april-28-2013","l":"April 28, 2013","p":["All TCGA data updated to the March Firehose run (March 26, 2012).","mRNA percentiles for altered genes shown in patient view."]},{"i":"april-2-2013","l":"April 2, 2013","p":["All TCGA data updated to the February Firehose run (February 22, 2012)."]},{"i":"march-28-2013","l":"March 28, 2013","p":["All TCGA data updated to the January Firehose run (January 16, 2012).","Data from a new bladder cancer study from MSKCC has been added (97 samples, Iyer et al., JCO in press)."]},{"i":"february-16-2013","l":"February 16, 2013","p":["The cBio Portal now contains mutation data from all provisional TCGA projects. Please adhere to the TCGA publication guidelines when using these and any TCGA data in your publications.","All data updated to the October Firehose run (October 24, 2012).","Sequencing read counts and frequencies are now shown in the Mutation Details table when available.","Improved OncoPrints, resulting in performance improvements."]},{"i":"november-21-2012","l":"November 21, 2012","p":["Major new feature: Users can now visualize genomic alterations and clinical data of individual tumors, including:","Summary of mutations and copy-number alterations of interest","Clinical trial information","TCGA Pathology Reports","New cancer summary view(Example Endometrial Cancer)","Updated drug data from KEGG DRUG and NCI Cancer Drugs (aggregated by PiHelper)"]},{"i":"october-22-2012","l":"October 22, 2012","p":["All data updated to the Broad Firehose run from July 25, 2012.","COSMIC data added to Mutation Details (via Oncotator).","All predicted functional impact scores are updated to Mutation Assessor 2.0.","Users can now base queries on genes in recurrent regions of copy-number alteration (from GISTIC via Firehose).","The Onco Query Language (OQL) now supports queries for specific mutations or mutation types.","Data sets added that match the data of all TCGA publications (GBM, ovarian, colorectal, and lung squamous)."]},{"i":"july-18-2012","l":"July 18, 2012","p":["Mutation data for the TCGA lung squamous cell carcinoma and breast cancer projects (manuscripts in press at Nature).","All data updated to the latest Broad Firehose run(May 25, 2012).","Drug information added to the network view (via Drugbank).","Improved cross-cancer queries: Option to select data types, export of summary graphs.","Users can now base queries on frequently mutated genes (from MutSig via Firehose)."]},{"i":"may-16-2012","l":"May 16, 2012","p":["All data updated to the latest Broad Firehose run(March 21, 2012).","Extended cross-cancer functionality, enabling users to query across all cancer studies in our database.","New \"build a case\" functionality, enabling users to generate custom case sets, based on one or more clinical attributes.","New OncoPrint features, including more compact OncoPrints, and support for RPPA visualization."]},{"i":"february-27-2012","l":"February 27, 2012","p":["All data updated to the latest Broad Firehose run(January 24, 2012).","Validated mutation data for colorectal cancer.","New feature: Mutation Diagrams that show mutations in the context of protein domains. TP53 Mutations in Ovarian Cancer"]},{"i":"january-30-2012","l":"January 30, 2012","p":["Updated data for several TCGA cancer studies.","Some small bug-fixes."]},{"i":"december-22-2011","l":"December 22, 2011","p":["Fourteen new TCGA cancer studies: This includes complete data for TCGA Colorectal Carcinoma and provisional data for thirteen other cancer types in the TCGA production pipeline. Please note that data from these thirteen new cancer types are provisional, not final and do not yet include mutation data. As per NCI guidelines, preliminary mutation data cannot be redistributed until they have been validated. TCGA","Four new data types:","Reverse-phase protein array (RPPA) data.","microRNA expression and copy-number (including support for multiple loci)","RNA-Seq based expression data.","log2 copy-number data.","Updated TCGA GBM copy-number, expression, and methylation data.","New gene symbol validation service. You can now use gene aliases and/or Entrez Gene IDs within your gene sets.","Links to IGV for visualization of DNA copy-number changes.","Background information from the Sanger Cancer Gene Census.","Two new Tutorials to get you quickly started in using the portal."]},{"i":"november-14-2011","l":"November 14, 2011","p":["New and improved mutation details, with sorting and filtering capabilities.","In collaboration with Bilkent University, we have added a new Network tab to our results pages. The network tab enables users to visualize, analyze and filter cancer genomic data in the context of pathways and interaction networks derived from Pathway Commons. GBM Network"]},{"i":"september-3-2011","l":"September 3, 2011","p":["You can now query across different cancer studies (feature available directly from the home page).","Our MATLAB CGDS Cancer Genomics Toolbox is now available. The toolbox enables you to download data from the cBio Portal, and import it directly into MATLAB.","The code for the cBio Portal has now been fully open sourced, and made available at Google Code. If you would like to join our open source efforts and make the portal even better, drop us an email."]},{"i":"march-2-2011","l":"March 2, 2011","p":["New plotting features and other improvements:","Correlation plots that show the relationship between different data types for individual genes.","Survival analysis - assess survival differences between altered and non-altered patient sets.","Updated R Package with support for correlation plots and general improvements for retrieving and accessing data in R data frames.","The Web Interface now supports basic clinical data, e.g. survival data.","Networks for pathway analysis are now available for download. Survival Analysis"]},{"i":"december-15-2010","l":"December 15, 2010","p":["Several new features, including:","Redesigned and streamlined user interface, based on user feedback and usability testing.","Advanced support for gene-specific alterations. For example, users can now view mutations within TP53, and ignore copy number alterations, or only view amplifications of EGFR, and ignore deletions.","Improved performance.","Frequently Asked Questions document released.","Updated Video Tutorial(update: old link no longer functional. Now see: YouTube"]},{"i":"november-4-2010","l":"November 4, 2010","p":["Enhanced Oncoprints, enabling users to quickly visualize genomic alterations across many cases. Oncoprints now also work in all major browsers, including Firefox, Chrome, Safari, and Internet Explorer.","Official release of our Web Interface, enabling programmatic access to all data.","Official release of our R Package, enabling programmatic access to all data from the R platform for statistical computing. OncoPrints"]}],[{"l":"Genie News"},{"i":"september-20-2023","l":"September 20, 2023","p":["Public Release 14.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The current release, GENIE 14.0-public now contains 183,000 sequenced samples from nearly 160,000 patients, making the AACR Project GENIE registry among the largest fully public cancer genomic data sets released to date.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"may-1-2023","l":"May 1, 2023","p":["Public Release 13.1 of AACR Project GENIE:","The public release 13.1 version of AACR GENIE has 65 samples retracted that were present in AACR GENIE 13.0-public.","More detailed information can be found in the AACR GENIE release notes and the data releases page from Sage Bionetworks"]},{"i":"january-9-2023","l":"January 9, 2023","p":["Public Release 13.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The current release, GENIE 13.0-public now contains more than 167,000 sequenced samples from over 148,000 patients, making the AACR Project GENIE registry among the largest fully public cancer genomic data sets released to date.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"november-3-2022","l":"November 3, 2022","p":["Public Release BPC CRC 2.0-PUBLIC","The GENIE BPC CRC v2.0-public dataset contains 1,485 CRC patients from three institutions: MSKCC, DFCI, and VICC.","The complete, post-processed data are available on Synapse"]},{"i":"july-6-2022","l":"July 6, 2022","p":["Public Release GENIE ERBB2 Cohort","The study contains 315 samples from 135 patients from 6 institues."]},{"i":"july-22-2022","l":"July 22, 2022","p":["Public Release 12.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The current release, GENIE 12.0-public, was released in July 2022.The registry now contains more than 154,000 sequenced samples from 137,000+ patients, making the AACR Project GENIE registry among the largest fully public cancer genomic data sets released to date.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"may-19-2022","l":"May 19, 2022","p":["Public Release BPC NSCLC 2.0-PUBLIC","The GENIE BPC NSCLC v2.0-public dataset contains 1,846 NSCLC patients from 4 institutions: MSKCC, DFCI, VICC and UHN.","The complete, post-processed data are available on Synapse"]},{"i":"january-7-2022","l":"January 7, 2022","p":["Public Release 11.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The current release, GENIE 11.0-public, was released in January 2022. The registry now contains over 136,000 sequenced samples from over 121,000 patients, making the AACR Project GENIE registry among the largest fully public cancer genomic data sets released to date.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"june-22-2021","l":"June 22, 2021","p":["Public Release 10.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The tenth data set, GENIE 10.0-public, was released in June 2021. With the most recent data release, the registry now contains genomic information from 120953 samples.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"february-8-2021","l":"February 8, 2021","p":["Public Release 9.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The ninth data set, GENIE 9.0-public, was released in February 2021. With the most recent data release, the registry now contains genomic information from more nearly 17,000 non-small cell lung carcinomas, and nearly 12,000 breast and more than 11,000 colorectal cancers.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"july-7-2020","l":"July 7, 2020","p":["Public Release 8.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The eighth data set, GENIE 8.0-public, was released in July 2020. The combined data set now includes nearly 96,000 de-identified genomic records collected from patients who were treated at each of the consortium's 19 participating institutions, making it among the largest fully public cancer genomic data sets released to date. The combined data set now includes data for over 80 major cancer types, including data from greater than 14,000 patients with lung cancer, nearly 12,000 patients with breast cancer, and nearly 9,500 patients with colorectal cancer.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"january-29-2020","l":"January 29, 2020","p":["Public Release 7.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The seventh data set, GENIE 7.0-public, was released in January 2020. The combined data set now includes nearly 79720 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. The combined data set now includes data for over 80 major cancer types.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"july-13-2019","l":"July 13, 2019","p":["Public Release 6.1 of AACR Project GENIE:","The sixth data set, GENIE 6.0-public, was released in early July 2019. A patch to GENIE 6.0-public, GENIE 6.1-pubic, was subsequently released on July 13, 2019. The combined data set now includes nearly 70,000 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. The combined data set now includes data for nearly 80 major cancer types, including data from nearly 11,000 patients with lung cancer, greater than 9,700 patients with breast cancer, and nearly 7,000 patients with colorectal cancer.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"july-08-2019","l":"July 08, 2019","p":["Public Release 6.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The sixth data set, GENIE 6.0-public, was released in July 2019 adding more than 11,000 records to the database. The combined data set now includes nearly 71,000 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. The combined data set now includes data for over 80 major cancer types, including data from greater than 11,000 patients with lung cancer, nearly 9,800 patients with breast cancer, and more than 7,000 patients with colorectal cancer.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"january-11-2019","l":"January 11, 2019","p":["Public Release 5.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The forth data set, GENIE 4.0-public, was released in July 2018 adding more than 7,800 records to the database. The combined data set now includes more than 59,000 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. This data will be released to the public every six months.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"july-16-2018","l":"July 16, 2018","p":["Public Release 4.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The forth data set, GENIE 4.0-public, was released in July 2018 adding more than 7,800 records to the database. The combined data set now includes more than 47,000 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. This data will be released to the public every six months. The public release of the fifth data set, GENIE 5.0-public, will take place in January, 2019.","The combined data set now includes data for over 80 major cancer types, including data from greater than 7,500 patients with lung cancer, nearly 5,500 patients with breast cancer, and more than 5,100 patients with colorectal cancer.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"l":"January 2018","p":["Added 7500 samples to the GENIE Public Cohort The combined dataset now includes samples from over 60 major cancer types including:","6,000 lung cancer samples.","4,500 breast cancer samples.","4,300 colorectal cancer samples.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. For frequently asked questions, visit the AACR FAQ page."]},{"i":"november-20-2017","l":"November 20, 2017","p":["Public Release 2.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The second dataset was released in November 2017, adding more than 13,000 records to the database. The combined data set now includes over 32,000 de-identified genomic records collected from patients who were treated at each of the consortium’s participating institutions, making it among the largest fully public cancer genomic data sets released to date. These data will be continuously updated on a quarterly basis.","The combined data set now includes data for 59 major cancer types, including data on nearly 5,000 patients with lung cancer, nearly 4,000 patients with breast cancer, and more than 3,500 patients with colorectal cancer. For more details about the data, and how to use it, consult the data guide.","In addition to accessing the data on the AACR Project GENIE cBioPortal website, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"january-5-2017","l":"January 5, 2017","p":["Initial public release of AACR Project GENIE:","Somatic alterations in 18,980 tumor samples from 18,500 patients sequenced at eight different institutions.","Data is available for download from Sage Bionetworks."]},{"l":"AACR Project GENIE cBioPortal Terms of Use","p":["I will not attempt to identify or contact individual participants from whom these data were collected by any means.","I will not redistribute the data without express written permission from the AACR Project GENIE Coordinating Center (info at aacrgenie dot org).","When publishing or presenting work using or referencing the AACR Project GENIE dataset please include the following attributions:","Please cite: AACR Project GENIE Consortium. AACR Project GENIE: Powering Precision Medicine Through an International Consortium, Cancer Discov. 2017 Aug;7(8):818-831 and include the version of the dataset used.","Include the following acknowledgement: The authors would like to acknowledge the American Association for Cancer Research and its financial and material support in the development of the AACR Project GENIE registry, as well as members of the consortium for their commitment to data sharing. Interpretations are the responsibility of study authors.","Should you decide at any point in the future to stop using the AACR Project GENIE cBioPortal, send request: genie-cbioportal-access at cbio dot mskcc dot org and we will remove your user id and any other information provided during new user registration from our systems.","For more information see terms of access on the AACR website."]}],[{"l":"API and API Clients","p":["cBioPortal provides a REST API for programmatic access to the data. The visualizations one can see on the website leverage the same API. By connecting to the API directly, anyone can build their own visalizations/reports.","Please see the full reference documentation for the API here."]},{"l":"API Clients","p":["The cBioPortal REST API is described using Swagger/OpenAPI, which allows one to generate a client in most programming languages. One can use the command line tool curl for dowloading data on the command line or use another language such as Python or R to make visualizations. We list some common examples below, but if your language is not listed, there is likely a client generator available elsewhere (see e.g. https://swagger.io/tools/swagger-codegen/). Do reach out if you'd like us to add a language."]},{"l":"R clients","p":["There are multiple ways to access the API using R. Below are two recommended R packages to access cBioPortal data."]},{"i":"cbioportaldata-recommended","l":"cBioPortalData (recommended)","p":["cBioPortalData aims to import all cBioPortal datasets as MultiAssayExperiment objects in Bioconductor. Some of its key features:","The MultiAssayExperiment class explicitly links all assays to the patient clinical/pathological data","The MultiAssayExperiment class provides a flexible API including harmonized subsetting and reshaping to convenient wide and long formats.","It provides complete datasets, not just for subsets of genes","It provides automatic local caching, thanks to BiocFileCache.","For a comprehensive user guide to cBioportalData see: https://waldronlab.io/cBioPortalData/articles/cBioPortalData.html","See also the workshop materials from our webinar which include an intro to cBioPortalData: https://github.com/cBioPortal/2020-cbioportal-r-workshop.","Note that one can point to private authenticated instances like this:"]},{"i":"cbioportalr-recommended","l":"cbioportalR (recommended)","p":["cbioportalR offers easy-to-use functions that allow users to browse and pull data from public or institutional cBioPortal sites without knowledge of web service or Bioconductor infrastructures. The package is tidyverse-compatible. Key package features include:","Comprehensive documentation aimed at helping clinical researchers understand the underlying structure of cBioPortal data","Tutorials for quick API authentication and set up","Functions to pull complete clinical and genomic data by study ID, molecular profile ID, sample list IDs or individual sample ID (e.g. get_genetics_by_study(), get_genetics_by_sample())","Functions to navigate and identify patient IDs, sample IDs or study IDs as needed, or infer necessary ID information for queries when not supplied by user.","Helper functions to pull information on gene panels ( get_gene_panel()), or lookup entrez ID ( get_entrez_id()), Hugo Symbol ( get_hugo_symbol()) or common gene aliases ( get_alias()) of genes","Capability to query multiple sample IDs from different studies concurrently","For a detailed tutorial on cbioportalR, see the package website: https://www.karissawhiting.com/cbioportalR/articles/overview-of-workflow.html"]},{"l":"rapiclient","p":["Although we recommend cBioPortalData or cbioportalR for most use cases, it is possible to connect to the API directly using rapiclient:"]},{"i":"cgdsr-will-be-deprecated","l":"CGDSR (will be deprecated)","p":["The CGDS-R package connects an older version of our web API ( webservice.do). Althought we will continue to keep webservice.do running for a while, we can't guarantee the same level of quality as our new API ( cbioportal.org/api) provides. Therefore we recommend that you use cBioPortalData instead."]},{"l":"Python client","p":["There are multiple ways to access the API using Python. One can use the bravado package to access the API directly, or use the cbio_py client, which provides a simple wrapper for the API and returns data in a format that is easy to work with."]},{"l":"bravado","p":["Generate a client in Python using bravado like this:","This allows you to access all API endpoints:","For easy tab completion you can add lower cases and underscores:","This example gets you all mutation data for the MSK-IMPACT 2017 study:","For a portal that requires authentication one can use (see Data Access Using Tokens):","A Jupyter notebook with more examples can be found here."]},{"l":"cbio_py","p":["See the cbio_py documentation: https://pypi.org/project/cbio-py/."]}],[{"l":"Deployment","p":["Private instances of cBioPortal are maintained by institutions and companies around the world.","An instance can be deployed using Docker (recommended) or by building and deploying from source. The source code of cBioPortal is available on GitHub under the terms of Affero GPL V3.","This section contains instructions for both of these paths.","Please note that installing a local version requires system administration skills; for example, installing and configuring Tomcat and MySQL. With limited resources, we cannot provide technical support on system administration."]}],[{"l":"Architecture Overview","p":["cBioPortal consists of the following components:","backend","MySQL database","REST API written in Java Spring","Redis cache for storing frequently used queries (optional)","validator checks file formats before importing data into the database","frontend built with React, Mobx and Bootstrap","session service for storing user saved data such as virtual studies and groups","REST API written in Java Spring enabling retrieval and writing to the database","MongoDB database","cBioPortal also uses the APIs from various external services to provide more information about a variant"]},{"l":"Backend","p":["The backend is written in Java and connects to a MySQL database to serve a REST API following the OpenAPI specification ( https://www.cbioportal.org/api/). Note that the repo where this lives in ( https://github.com/cBioPortal/cbioportal) also contains Java classes to import data as well as the validator. The backend can be configured to connect to a Redis cache to store database query results for improved performance.","The backend is organized as a multi-module Maven project. See cBioPortal backend code organization."]},{"l":"Validator","p":["The validator checks file formats before importing data into the database. There is a wrapper script metaImport.py that validates the data and subsequently calls the relevant Java classes to import the data."]},{"l":"Session Service","p":["The session service is used for storing user saved data such as virtual studies and groups. See the tutorials section to read more about these features. Session service is a Java app that serves a REST API backed by a Mongo database. The session service is served as a proxy through the cBioPortal backend REST API. The backend is therefore the only component that needs to be able to connect to it. The frontend does not connect to it directly."]},{"l":"Frontend","p":["The frontend is a single page app built with React, Mobx and Bootstrap. The data gets pulled from the backend REST API. The frontend is by default included with the backend so no extra setup is required."]},{"l":"External Services","p":["cBioPortal uses the APIs from several external services to provide more information about a variant:","OncoKB","CIVIC","Genome Nexus","G2S","For privacy concerns see the section: A note on privacy."]},{"l":"OncoKB","p":["OncoKB is a precision oncology knowledge base that contains information about the effects and treatment implications of specific cancer gene alterations. See the section OncoKB Data Access for how to configure external OncoKB service."]},{"l":"CIVIC","p":["CIVIC is a community-edited forum for discussion and interpretation of peer-reviewed publications pertaining to the clinical relevance of variants (or biomarker alterations) in cancer. For information on how to deploy this service yourself see: https://github.com/griffithlab/civic-server. It is also possible to disable showing CIVIC in cBioPortal by setting show.civic=false in the portal.properties(See portal.properties reference)."]},{"l":"Genome Nexus","p":["Genome Nexus is a comprehensive one-stop resource for fast, automated and high-throughput annotation and interpretation of genetic variants in cancer. For information on how to deploy this service yourself see: https://github.com/genome-nexus/genome-nexus. For more information on the various annotation sources and versions provided by Genome Nexus see: https://docs.genomenexus.org/annotation-sources."]},{"l":"G2S","p":["G2S (Genome to Structure) maps genomic variants to 3D structures. cBioPortal uses it on the mutations tab to show the variants on a 3D structure. For information on how to deploy this service yourself see: https://github.com/genome-nexus/g2s."]},{"l":"A note on privacy","p":["cBioPortal calls these services with variant information from the cBioPortal database. It however does not send over information that links a variant to a particular sample or patient. If this is a concern for your use case we recommmend to deploy your own versions of these services. See the sections above to linkouts for instructions on how to do this."]}],[{"l":"Hardware Requirements","p":["Hardware requirements will vary depending on the volume of users you anticipate will access your cBioPortal instance and the amount of data loaded in the portal. We run cbioportal.org on an AWS r5.xlarge instance with 32 GB and 4 vCPUs. The public database consumes ~ 50 GB of disk space. The site is visited by several thousands of users a day. For on-premise installation recommendations one can look at the AWS instance type specs:","Platform","instance type","(v)CPUs","RAM(GB)","Storage (GB)","aws","r5.xlarge","4","32","50","on-premise","-","The hardware requirements are much lower when one has only a few users a day. Minimally, 2GB of RAM is needed to run a cBioPortal instance. If you do not plan to import public studies, depending on the size of your private data, 10GB of disk space may be sufficient.","Another possible consideration is caching. The portal can cache responses to requests so that repeated database queries are avoided. On the public cBioPortal deployment we enable this cache and allocate 1GB of additional RAM and 4GB of additional disk space for caching. For directions on configuring caching, see Ehcache Settings"]}],[{"l":"Deploy with Docker"},{"l":"Prerequisites","p":["Docker provides a way to run applications securely isolated in a container, packaged with all its dependencies and libraries. To learn more on Docker, kindly refer here: Docker overview.","Make sure that you have the latest version of Docker installed on your machine. Get latest version","Notes for non-Linux systems"]},{"l":"Usage instructions","p":["In this example we use Docker Compose to spin up all the different required containers/services for cBioPortal."]},{"l":"Quick Start","p":["You should now be able to see the cBioPortal website at http://localhost:8080","Import studies with:","Clear persistent data volumes with:"]},{"l":"Comprehensive Start"},{"i":"step-1---run-docker-compose","l":"Step 1 - Run Docker Compose","p":["Download the git repo that has the Docker compose file and go to the root of that folder:","Then download all necessary files (seed data, example config and example study from datahub) with the init script:","Then run:","This will start all four containers (services) defined here. That is:","the mysql database, which holds most of the cBioPortal data","the cBioPortal Java web app, this serves the React frontend as well as the REST API","the session service Java web app. This service has a REST API and stores session information (e.g. what genes are being queried) and user specific data (e.g. saved cohorts) in a separate mongo database","the mongo database that persists the data for the session service","It will take a few minutes the first time to import the seed database and perform migrations if necessary. Each container outputs logs to the terminal. For each log you'll see the name of the container that outputs it (e.g. cbioportal_container or cbioportal_session_database_container). If all is well you won't see any significant errors (maybe some warnings, that's fine to ignore). If all went well you should be able to visit the cBioPortal homepage on http://localhost:8080. You'll notice that no studies are shown on the homepage yet:","Go to the next step to see how to import studies."]},{"l":"Notes on detached mode","p":["If you prefer to run the services in detached mode (i.e. not logging everything to your terminal), you can run","In this mode, you'll have to check the logs of each container manually using e.g.:","You can list all containers running on your system with"]},{"i":"step-2---import-studies","l":"Step 2 - Import Studies","p":["To import studies you can run:","This will import the lgg_ucsf_2014 study into your local database. It will take a few minutes to import. After importing, restart the cbioportal web container:","or","All public studies can be downloaded from https://www.cbioportal.org/datasets, or https://github.com/cBioPortal/datahub/. You can add any of them to the ./study folder and import them. There's also a script (./study/init.sh) to download multiple studies. You can set DATAHUB_STUDIES to any public study id (e.g. lgg_ucsf_2014) and run ./init.sh."]},{"l":"Notes on restarting","p":["To avoid having to restart one can alternatively hit an API endpoint. To do so, call the /api/cache endpoint with a DELETE http-request (see here for more information):","The value of the API key is configured in the portal.properties file. You can visit http://localhost:8080 again and you should be able to see the new study."]},{"i":"step-3---customize-your-portalproperties-file","l":"Step 3 - Customize your portal.properties file","p":["The properties file can be found in ./config/portal.properties. Which was set up when running init.sh.","This properties file allows you to customize your instance of cBioPortal with e.g. custom logos, or point the cBioPortal container to e.g. use an external mysql database. See the properties documentation for a comprehensive overview.","If you would like to enable OncoKB see OncoKB data access for how to obtain a data access token. After obtaining a valid token use:"]},{"i":"step-4---customize-cbioportal-setup","l":"Step 4 - Customize cBioPortal setup","p":["To read more about the various ways to use authentication and parameters for running the cBioPortal web app see the relevant backend deployment documentation.","On server systems that can easily spare 4 GiB or more of memory, set the -Xms and -Xmx options to the same number. This should increase performance of certain memory-intensive web services such as computing the data for the co-expression tab. If you are using MacOS or Windows, make sure to take a look at these notes to allocate more memory for the virtual machine in which all Docker processes are running."]},{"l":"More commands","p":["For documentation on how to import a study, see this tutorial For more uses of the cBioPortal image, see this file","To Dockerize a Keycloak authentication service alongside cBioPortal, see this file."]},{"l":"Uninstalling cBioPortal"}],[{"l":"Import data with Docker"},{"l":"Import data instructions","p":["This is an example to import a sample study: study_es_0. When trying to import other studies, please follow the same routine:","import gene panels (if applicable, studies without gene panels are assumed to be whole exome/genome)","import study data"]},{"i":"step-1---import-gene-panels","l":"Step 1 - Import gene panels","p":["To import gene panels for your study, please reference the example commands in this file","These are the commands for importing study_es_0 gene panels ( data_gene_panel_testpanel1 and data_gene_panel_testpanel2):"]},{"i":"step-2---import-data","l":"Step 2 - Import data","p":["To import data for your study, please reference the example commands in this file","Command for importing study_es_0 data:","⚠️ after importing a study, remember to restart cbioportal to see the study on the home page. Run docker-compose restart cbioportal.","You have now imported the test study study_es_0. Note that this study is included inside the cbioportal container. The process for adding a study that is outside of the container is similar. Just make sure to add the data files in the ./study folder. This folder is mounted as /study/ inside of the container."]},{"l":"Frequently Asked Questions"},{"l":"Gene panel ID is not in database","p":["If you see an error like this when you importing the data: ERROR: data_gene_panel_matrix.txt: lines [2, 3, 4, (10 more)]: Gene panel ID is not in database. Please import this gene panel before loading study data.; values encountered: ['TESTPANEL1', 'TESTPANEL2']","please follow the first step to import gene panels (e.g. import data_gene_panel_testpanel1 and data_gene_panel_testpanel2 for study_es_0), then try to import the data again."]},{"l":"Error occurred during validation step","p":["Please make sure the seed database was correctly imported."]},{"i":"study-imported-correctly-but-got-error-when-trying-to-query-something","l":"Study imported correctly, but got error when trying to query something","p":["Remember to restart the cbioportal after data imported."]},{"l":"Import GRCh38 data","p":["If you are importing GRCh38 data, please remember to set the reference_genome: hg38 field in the meta_study.txt file. See also cancer study metadata."]}],[{"l":"Example commands"},{"l":"Importing gene panel","p":["Use this command to import a gene panel. Specify the gene panel file by replacing path_to_genepanel_file with the absolute path to the gene panel file. Another option is to add the gene panel files in ./study which is mounted inside the container on `/study/."]},{"l":"Importing data","p":["Use this command to validate a dataset. Add the study to the ./study folder. The command will connect to the web API of the container cbioportal-container, and import the study in its associated database. Make sure to replace path_to_report_folder with the absolute path were the html report of the validation will be saved.","⚠️ after importing a study, remember to restart cbioportal-container to see the study on the home page. Run docker-compose restart cbioportal."]},{"l":"Using cached portal side-data","p":["In some setups the data validation step may not have direct access to the web API, for instance when the web API is only accessible to authenticated browser sessions. You can use this command to generate a cached folder of files that the validation script can use instead. Make sure to replace path_to_portalinfo with the absolute path where the cached folder is going to be generated.","Then, grant the validation/loading command access to this folder and tell the script it to use it instead of the API:"]},{"l":"Inspecting or adjusting the database"},{"l":"Deleting a study","p":["To remove a study, run:","Where study_id is the cancer_study_identifier of the study you would like to remove."]}],[{"l":"Authenticating and Authorizing Users using Keycloak in Docker","p":["This guide describes a way to Dockerise Keycloak along with cBioPortal, for authentication.","First, create an isolated network in which the Keycloak and MySQL servers can talk to one another.","Run a MySQL database in which Keycloak can store its data. This database server will not be addressable from outside the Docker network. Replace path_to_database with the absolute path where the folder kcdb-files will be placed. This folder is used by the database to store its files.","Then run the actual Keycloak server, using this image available from Docker Hub. This will by default connect to the database using the (non-root) credentials in the example above. The server will be accessible to the outside world on port 8180, so make sure to choose a strong administrator password.","The command below uses the default values for MYSQL_DATABASE, MYSQL_USER and MYSQL_PASSWORD(listed in the command above). If you wish to change these credentials, specify them in the command below. For instance, if MYSQL_USER in the database container is user, you need to add -e MYSQL_USER=user.","Finally, configure Keycloak and cBioPortal as explained in the Keycloak documentation. Remember to specify port 8180 for the Keycloak server, wherever the guide says 8080.","After configuring Keycloak, set up cBioPortal containers as specified in the documentation. Make sure to update the -Dauthenticate in the docker-compose file to -Dauthenticate=saml."]}],[{"l":"Software Requirements","p":["This page describes various system software required to run the cBioPortal."]},{"l":"MySQL","p":["The cBioPortal software should run properly on MySQL version 5.7.x. Versions higher than 5.7.x can cause an issue while loading the database schema. Minor versions lower than 5.7.x will cause issues with persistent cache invalidation. The software can be found and downloaded from the MySQL website.","On Ubuntu: sudo apt-get install mysql-server"]},{"l":"MongoDB","p":["The session service uses MongoDB 3.6.6"]},{"l":"Java","p":["As of this writing, the cBioPortal can be compiled and run from Java 11 and above. The software can be found and download from the Oracle website.","On Ubuntu: sudo apt-get install default-jdk"]},{"l":"Apache Maven","p":["The cBioPortal source code is an Apache Maven driven project. The software needs to be downloaded and installed prior to building the application from source code. It can be found on the Apache Maven website. We are currently using version 3.5.4.","On Ubuntu: sudo apt-get install maven"]},{"l":"Git","p":["You will need a git client to download the cBioPortal source code.","On Ubuntu: sudo apt-get install git"]}],[{"l":"Pre-Build Steps"},{"l":"Get the Latest Code","p":["Make sure that you have cloned the last code, and make sure you are on the master branch:"]},{"i":"prepare-the-log4jproperties-file","l":"Prepare the log4j.properties File","p":["This file configures logging for the portal. An example file is available within GitHub:","If you don't create your own logback.xml, maven will copy the EXAMPLE file to that location when it builds. If logback.xml already exists, it will just use that. This allows us to give you a working, versioned log config, which you can then override easily.","To modify the logging during tests the same EXAMPLE file can be copied to the relevant test resources folder."]},{"l":"Create the cBioPortal MySQL Databases and User","p":["You must create a cbioportal database and a cgds_test database within MySQL, and a user account with rights to access both databases. This is done via the mysql shell."]}],[{"l":"Building from Source"},{"l":"Building with Maven","p":["While building, you must point the environment variable PORTAL_HOME to the root directory containing the portal source code.","For example, run a command like the following if on macOS:","To compile the cBioPortal source code, move into the source directory and run the following maven command:","After this command completes, you will find a cbioportal.war file suitable for Apache Tomcat deployment in portal/target/. It is not neccessary to install Tomcat yourself, since a command line runnable version of Tomcat is provided as a dependency in portal/target/dependency/webapp-runner.jar.","However, if you will be deploying to a standalone Tomcat installation, and if you have configured Tomcat to use the Redisson client for user session management, you should expect a clash between the Redisson client being used for session management and the Redisson client which is embedded in the cbioportal.war file for the optional \"redis\" persitence layer caching mode. In this case, you should avoid using the \"redis\" option for the portal property persistence.cache_type and you should prevent the Redisson client from being packaged in cbioportal.war by building with this command instead:"]},{"l":"alternative for standalone tomcat deployments which use redis session management"}],[{"l":"Importing the Seed Database","p":["The next step is to populate your cBioPortal instance with all the required background data sets. This includes for example gene data, ID mappings, and network interactions. Rather than importing each of these data sets individually, we have provided a simple \"seed\" database that you can import directly."]},{"l":"Download the cBioPortal Seed Database","p":["A cBioPortal seed database for human can be found on the datahub page. If you are looking for mouse, check this link.","After download, the files can be unzipped by entering the following command:"]},{"l":"Import the cBioPortal Seed Database","p":["Important: Before importing, make sure that you have followed the pre-build steps for creating the cbioportal database (see section \"Create the cBioPortal MySQL Databases and User\").","Import the database schema (/db-scripts/src/main/resources/cgds.sql):","Note that this may currently fail when using the default character encoding on MySQL 8.0 ( utf8mb4); this is why MySQL 5.7 (which uses latin1) is recommended.","Import the main part of the seed database:","Important: Replace seed-cbioportal_RefGenome_vX.Y.Z.sql with the downloaded version of the seed database, such as seed-cbioportal_hg19_v2.3.1.sql or seed-cbioportal_mm10_v2.3.1.sql.","(Human only) Import the Protein Data Bank (PDB) part of the seed database. This will enable the visualization of PDB structures in the mutation tab. Loading this file takes more time than loading the previous files, and is optional for users that do not require PDB structures.","Important: Replace seed-cbioportal_hg19_vX.Y.Z_only-pdb.sql with the downloaded version of the PDB database, such as seed-cbioportal_hg19_v2.3.1_only-pdb.sql.","(optional : support for microRNA genomic profiles) Import constructed gene table records for microRNA genomic profiles. Currently, cBioPortal supports the combined display of copy number alterations (generally reported for microRNA precursors) and expression (generally reported for microRNA mature forms) by adding gene table records which represent the combination of microRNA precursor and microRNA mature form. Appropriate aliases are added to the gene_alias table so that both the name of the precursor and the name of the mature form are recognized references to the combination.","After the code has been successfully configured and built, you can import the needed microRNA records by running the following command from the $PORTAL_HOME directory:","Important: Please be aware of the version of the seed database. In the README on datahub, we stated which version of cBioPortal is compatible with the current seed database.","If the database is older than what cBioPortal is expecting, the system will ask you (during startup or data loading) to migrate the database to a newer version. The migration process is described here."]}],[{"l":"Deploying the Web Application"},{"l":"Prepare the global configuration file","p":["The portal is configured using a global configuration file, portal.properties. An example file is available in the src/main/resources folder. Use it as a template to create your own:","For more information about the portal.properties file, see the reference page.","Several scripts of cBioPortal use this portal.properties file to get info like db connection parameters. You can indicate the folder where this file is with an environment variable:","if your properties file is at PORTAL_HOME/portal.properties"]},{"l":"Run cBioPortal Session Service","p":["The cBioPortal app requires session service. For instructions on how to run this without Docker see https://github.com/cBioPortal/session-service#run-without-docker. Once this is working, update the properties file:"]},{"l":"Run the cbioportal backend","p":["To run the app we use webapp-runner. It's a command line version of Tomcat provided by Heroku. All parameters can be seen with:","This runs the app in the foreground. If a port is already in use it will raise an error mentioning that. To change the port use the --port flag.","There are three main ways to run the portal: without authentication, with optional login and with required login. All of them require the cBioPortal session service to be running."]},{"l":"Without authentication","p":["In this mode users are able to use the portal, but they won't be able to save their own virtual studies and groups. See the optional login section to enable this."]},{"l":"Optional login","p":["In this mode users can see all the data in the portal, but to save their own groups and virtual studies they are required to log in. This will allow them to store user data in the session service. See the tutorials section to read more about these features.","Google and Microsoft live are supported as optional login currently. Possible values for authenticate are","One needs to set the Google/Microsoft related configurations in the portal.properties file:","See Google's Sign in Documentation to obtain these values.","See Microsoft Sign in Documentation to obtain these values."]},{"l":"Required login","p":["Change CHOOSE_DESIRED_AUTHENTICATION_METHOD to one of googleplus, saml, openid, ad, ldap. The various methods of authentication are described in the Authorization and Authentication section."]},{"l":"Property configuration","p":["The configuration defined in portal.properties can also be passed as command line arguments. The priority of property loading is as follows:","-D command line parameters overrides all","${PORTAL_HOME}/portal.properties","portal.properties supplied at compile time","Defaults defined in code","Note that the authenticate property is currently required to be set as a command line argument, it won't work when set in portal.properties(See issue #6109).","Some scripts require a ${PORTAL_HOME}/portal.properties file, so it is best to define the properties there."]},{"l":"Note for Tomcat Deployers","p":["Before we were using webapp-runner, our documentation recommended a system level installed Tomcat. In this case people might have been using dbconnector=jndi instead of the new default dbconnector=dbcp. There is a known issue where setting dbconnector in the properties file does not work (#6148). It needs to be set as a command line argument. For Tomcat this means CATALINA_OPT=-Ddbconnector=jndi."]},{"l":"Verify the Web Application","p":["Lastly, open a browser and go to: http://localhost:8080"]},{"l":"Important","p":["Each time you modify any java code, you must recompile and redeploy the app.","Each time you modify any properties (see customization options), you must restart the app","Each time you add new data, you must restart the app or call the /api/cache endpoint with a DELETE http-request (see here for more information)."]}],[{"l":"Loading a Sample Study","p":["Once you have confirmed that the cBioPortal server is installed, you are ready to import data. Importing a sample study is recommended to verify that everything is working correctly.","The cBioPortal distribution includes a small dummy study, study_es_0, which contains all datatypes supported by cBioPortal. This document describes how to import the prerequisites for the sample study and how to import the study itself."]},{"l":"Set the PORTAL_HOME environment variable","p":["Most cBioPortal command-line tools, including the data loading pipeline, expect the environment variable $PORTAL_HOME to point to a folder containing the portal.properties configuration file, as explained during the previous step.","Configure your shell to keep the variable set to the right folder. On GNU/Linux and macOS this usually means appending a line like the following to .bash_profile in your home directory:"]},{"l":"Import Gene Panel for Sample Study","p":["The sample gene panel has to be imported before gene panel study data can be added to the database.","After loading gene panels into the database, please restart Tomcat or call the /api/cache endpoint with a DELETE http-request(see here for more information) so that the validator can retrieve gene panel information from the cBioPortal API.","More details to load your own gene panel and gene set data can be found here: Import Gene Panels."]},{"l":"Validating the Sample Study","p":["First it's useful to validate the study study_es_0, to check if the data is formatted correctly.","To do so, go to the importer folder:","and then run the following command:","If all goes well, you should see the final output message:"]},{"l":"Importing the Sample Study","p":["To import the sample study:","and then run the following command:","You will see a series of output messages, hopefully ending with a status message like this:","After loading the study data, please restart the app or call the /api/cache endpoint with a DELETE http-request(see here for more information)."]}],[{"l":"User Authorization","p":["This step is only required if you intend on running an instance of the portal that supports user authorization.","Two tables need to be populated in order to support user authorization."]},{"i":"table--users","l":"Table: users","p":["This table contains all the users that have authorized access to the instance of the portal. The table requires a user's email address, name, and integer flag indicating if the account is enabled.","An example entry would be:","Note, if the ENABLED value is set to 0, the user will be able to login to the portal, but will see no studies.","You need to add users via MySQL directly. For example:"]},{"i":"table--authorities","l":"Table: authorities","p":["This table contains the list of cancer studies that each user is authorized to view. The table requires a user email address and an authority (e.g. cancer study) granted to the user.","Some example entries would be:","The value in the EMAIL column should be the same email address contained in the USER table.","The value in the AUTHORITY column is made of two parts:","The first part is the name of your portal instance. This name should also match the app.name property found in the portal.properties file.","Following a colon delimiter, the second part is the cancer_study_identifier of the cancer study this user has rights to access.","If the user has rights to all available cancer studies, a single entry with the keyword app.name: + \"ALL\" is sufficient (so e.g. \"cbioportal:ALL\").","You need to add users via MySQL directly. For example:","Important Note: The cancer study identifier is not CASE sEnsitive. So it can be UPPER CASE, or just how it is stored in the cancer_study table. Changes to these tables become effective the next time the user logs in."]},{"l":"Using groups","p":["It is also possible to define groups and assign multiple studies and users to a group. You can add a group name to the cancer_study table GROUPS column. This same group name can be used in the AUTHORITY column of the authorities table mentioned above."]},{"i":"example","l":"Example:","p":["We want to create the group \"TEST_GROUP1\" and assign two existing studies to it and give our user 'john.smith@gmail.com' access to this group of studies. Steps:","1- Find your studies in table cancer_study","2- Update the GROUPS field, adding your \"TEST_GROUP1\" to it. ⚠️ This is a ; separated column, so if you want a study to be part of multiple groups, separate them with ;.","If GROUPS already has a value (like for study 93 in example above) then add \";TEST_GROUP1\" to ensure existing groups are not ovewritten.","3- Check the result:","4- Add the group to user 'john.smith@gmail.com', using app.name:+ \"TEST_GROUP1\" like so:","After next login, the user 'john.smith@gmail.com' will have access to these two studies."]},{"l":"Configuring PUBLIC studies","p":["To enable a set of public studies that should be visible to all users, without the need to configure this for each user in the authorities and users tables, you can set the property always_show_study_group in portal.properties file. For example, you can set:","This will enable the word \"PUBLIC\" to be used in the column GROUPS of the table cancer_study to indicate which studies should be always shown to any authenticated user, regardless of authorization configurations."]},{"i":"example-1","l":"Example:","p":["To reuse the example table above, let's assume the property always_show_study_group is set as indicated above and the cancer_study table contents are set to the following:","In this case, the study brca_tcga will be visible to any authenticated user while the study acc_tcga will be visible only to users configured to be part of GROUPB or TEST_GROUP1"]}],[{"l":"Introduction","p":["The cBioPortal includes support for SAML (Security Assertion Markup Language). This document explains why you might find SAML useful, and how to configure SAML within your own instance of cBioPortal.","Please note that configuring your local instance to support SAML requires many steps. This includes configuration changes and a small amount of debugging. If you follow the steps below, you should be up and running relatively quickly, but be forewarned that you may have do a few trial runs to get everything working.","In the documentation below, we also provide details on how to perform SAML authentication via a commercial company: OneLogin. OneLogin provides a free tier for testing out SAML authentication, and is one of the easier options to get a complete SAML workflow set-up. Once you have OneLogin working, you should then have enough information to transition to your final authentication service."]},{"i":"what-is-saml","l":"What is SAML?","p":["SAML is an open standard that enables one to more easily add an authentication service on top of any existing web application. For the full definition, see the SAML Wikipedia entry.","In its simplest terms, SAML boils down to four terms:","identity provider: this is a web-based service that stores user names and passwords, and provides a login form for users to authenticate. Ideally, it also provides easy methods to add / edit / delete users, and also provides methods for users to reset their password. In the documentation below, OneLogin.com serves as the identity provider.","service provider: any web site or web application that provides a service, but should only be available to authenticated and authorized users. In the documentation below, the cBioPortal is the service provider.","authentication: a means of verifying that a user is who they purport to be. Authentication is performed by the identify provider, by extracting the user name and password provided in a login form, and matching this with information stored in a database. When authentication is enabled, multiple cancer studies can be stored within a single instance of cBioPortal while providing fine-grained control over which users can access which studies. Authorization is implemented within the core cBioPortal code, and not the identify provider."]},{"i":"why-is-saml-relevant-to-cbioportal","l":"Why is SAML Relevant to cBioPortal?","p":["The cBioPortal code has no means of storing user name and passwords and no means of directly authenticating users. If you want to restrict access to your instance of cBioPortal, you therefore have to consider an external authentication service. SAML is one means of doing so, and your larger institution may already provide SAML support. For example, at Sloan Kettering and Dana-Farber, users of the internal cBioPortal instances login with their regular credentials via SAML. This greatly simplifies user management."]},{"l":"Setting up an Identity Provider","p":["As noted above, we provide details on how to perform SAML authentication via a commercial company: OneLogin. If you already have an IDP set up, you can skip this part and go to Configuring SAML within cBioPortal.","OneLogin provides a free tier for testing out SAML authentication, and is one of the easier options to get a complete SAML workflow set-up. Once you have OneLogin working, you should then have enough information to transition to your final authentication service. As you follow the steps below, the following link may be helpful: How to Use the OneLogin SAML Test Connector.","To get started:","Register a new OneLogin.com Account"]},{"l":"Setting up a SAML Test Connector","p":["\"SAVE\" the app, then select the Configuration Tab.","ACS (Consumer) URL Validator*: ^ http://localhost:8080/cbioportal/saml/SSO$","ACS (Consumer) URL*: http://localhost:8080/saml/SSO","Add at least the parameters:","Audience: cbioportal","Configure these email parameters in the Users menu:","Email (Attribute)","Email (SAML NameID)","Find your user in the \"Users\" menu","Link the SAML app to your user (click \"New app\" on the + icon found on the top right of the \"Applications\" table to do this - see screenshot below):","Login to OneLogin.com.","Recipient: http://localhost:8080/saml/SSO","Search for SAML.","Select the option labeled: OneLogin SAML Test (IdP w/attr).","Under Apps, Select Add Apps.","Under the Configuration Tab for OneLogin SAML Test (IdP w/attr), paste the following fields (this is assuming you are testing everything via localhost)."]},{"l":"Downloading the SAML Test Connector Meta Data","p":["Go to the SSO Tab within OneLogin SAML Test (IdP), find the field labeled: Issuer URL. Copy this URL and download it's contents. This is an XML file that describes the identity provider.","then, move this XML file to:","You should now be all set with OneLogin.com. Next, you need to configure your instance of cBioPortal."]},{"l":"Configuring SAML within cBioPortal"},{"l":"Creating a KeyStore","p":["In order to use SAML, you must create a Java Keystore.","This can be done via the Java keytool command, which is bundled with Java.","Type the following:","This will create a Java keystore for a key called: secure-key and place the keystore in a file named samlKeystore.jks. You will be prompted for:","keystore password (required, for example: apollo1)","your name, organization and location (optional)","key password for secure-key(required, for example apollo2)","When you are done, copy samlKeystore.jsk to the correct location:","If you need to export the public certificate associated within your keystore, run:"]},{"l":"HTTPS and Tomcat","p":["⚠️ If you already have an official (non-self-signed) SSL certificate, and need to get your site running on HTTPS directly from Tomcat, then you need to import your certificate into the keystore instead. See this Tomcat documentation page for more details.","⚠️ An extra warning for when configuring HTTPS for Tomcat: use the same password for both keystore and secure-key. This seems to be an extra restriction by Tomcat."]},{"l":"Modifying configuration","p":["Within portal.properties, make sure that:","Then, modify the section labeled authentication. See SAML parameters shown in example below:","Please note that you will have to modify all the above to match your own settings. saml.idp.comm.binding.type can be left empty if saml.idp.comm.binding.settings=defaultBinding. The saml.logout.* settings above reflect the settings of an IDP that supports Single Logout (hopefully the default in most cases - more details in section below).","In the case that you are running cBioPortal behind a reverse proxy that handles the SSL certificates (such as nginx or traefik), you will have to also specify saml.sp.metadata.entitybaseurl. This should point to https://host.example.come:443. This setting is required such that cBioPortal uses the Spring SAML library appropriately for creating redirects back into cBioPortal.","In addition there is a known bug where redirect from the cBioPortal instance always goes over http instead of https ( https://github.com/cBioPortal/cbioportal/issues/6342). To get around this issue you can pass the full URL including https to the webapp-runnner.jar command with e.g. --proxy-base-url https://mycbioportalinstance.org."]},{"l":"Custom scenarios","p":["ℹ️ Some settings may need to be adjusted to non-default values, depending on your IDP. For example, if your IDP required HTTP-GET requests instead of HTTP-POST, you need to set these properties as such:","If you need a very different parsing of the SAML tokens than what is done at org.cbioportal.security.spring.authentication.saml.SAMLUserDetailsServiceImpl, you can point the saml.custom.userservice.class to your own implementation:","⚠️ The properties saml.idp.metadata.attribute.email, and saml.idp.metadata.attribute.userName can also vary per IDP. It is important to set these correctly since these are a required field by the cBioPortal SAML parser (that is, if org.cbioportal.security.spring.authentication.saml.SAMLUserDetailsServiceImpl is chosen for property saml.custom.userservice.class).","⚠️ Some IDPs like to provide their own logout page (e.g. when they don't support the custom SAML Single Logout protocol). For this you can adjust the saml.logout.url property to a custom URL provided by the IDP. Also set the saml.logout.local=true property in this case to indicate that global logout (or Single Logout) is not supported by IDP:","⚠️ Some IDPs (e.g. Azure Active Directory) cache user data for more than 2 hours causing cbioportal to complain that the authentication statement is too old to be used. You can fix this problem by setting forceAuthN to true. Below is an example how you can do this with the properties. You can choose any binding type you like. bindings:HTTP-Redirect is given just as an example."]},{"l":"More customizations","p":["If your IDP does not have the flexibility of sending the specific credential fields expected by our default \"user details parsers\" implementation (i.e. security/security-spring/src/main/java/org/cbioportal/security/spring/authentication/saml/SAMLUserDetailsServiceImpl.java expects field mail to be present in the SAML credential), then please let us know via a new issue at our issue tracking system, so we can evaluate whether this is a scenario we would like to support in the default code. You can also consider adding your own version of the SAMLUserDetailsService class."]},{"l":"Authorizing Users","p":["Next, please read the Wiki page on User Authorization, and add user rights for a single user."]},{"i":"configuring-the-loginjsp-page-not-applicable-to-most-external-idps","l":"Configuring the Login.jsp Page (not applicable to most external IDPs)","p":["The login page is configurable via the portal.properties properties skin.authorization_message and skin.login.saml.registration_htm. For example in skin.authorization_message you can be set to something like this:","and skin.login.saml.registration_htm can be set to:","You can also set a standard text in skin.login.contact_html that will appear in case of problems:"]},{"l":"Doing a Test Run","p":["You are now ready to go.","Rebuild the WAR file and follow the Deployment with authentication steps using authenticate=saml.","Then, go to: http://localhost:8080/.","If all goes well, the following should happen:","You will be redirected to the OneLogin Login Page.","After authenticating, you will be redirected back to your local instance of cBioPortal.","If this does not happen, see the Troubleshooting Tips below."]},{"l":"Troubleshooting Tips"},{"l":"Logging","p":["Getting this to work requires many steps, and can be a bit tricky. If you get stuck or get an obscure error message, your best bet is to turn on all DEBUG logging. This can be done via src/main/resources/logback.xml. For example:","Then, rebuild the WAR, redeploy, and try to authenticate again. Your log file will then include hundreds of SAML-specific messages, even the full XML of each SAML message, and this should help you debug the error."]},{"l":"Seeing the SAML messages","p":["Another tool we can use to troubleshoot is SAML tracer ( https://addons.mozilla.org/en-US/firefox/addon/saml-tracer/). You can add this to Firefox and it will give you an extra menu item in \"Tools\". Go through the loging steps and you will see the SAML messages that are sent by the IDP."]},{"l":"Obtaining the Service Provider Meta Data File","p":["By default, the portal will automatically generate a Service Provider (SP) Meta Data File. You may need to provide this file to your Identity Provider (IP).","You can access the Service Provider Meta Data File via a URL such as:","http://localhost:8080/saml/metadata"]}],[{"l":"Authenticating Users via LDAP","p":["To connect cBioPortal to an external user database such as Active Directory will require the installation of Keycloak. Please read the Wiki page on Authenticating and Authorizing Users via Keycloak for information on how to connect the cBioPortal with Keycloak. You can also read how to connected Keycloak to Active Directory via LDAP on the User Storage Federation webpage of the Keycloak website."]}],[{"l":"Authenticating and Authorizing Users via Keycloak","p":["⚠️ This documentation is for keycloak =\" driver annotation sources in the settings menu of Results View can be hidden by turning off the following property (default: true):","\"COSMIC>=\" driver annotation sources in the settings menu of Results View can be hidden by turning off the following property (default: true):"]},{"l":"CIViC integration","p":["CIViC integration can be turned on or off with the following property (default: true):","The CIViC API url is set to https://civic.genome.wustl.edu/api/ by default. It can be overridden using the following property:"]},{"l":"Genome Nexus Integration","p":["Genome Nexus provides annotations of mutations in cBioPortal. The mutations tab relies heavily on the Genome Nexus API, therefore that tab won't work well without it. By default cBioPortal will use the public Genome Nexus API, such that no extra configuration is necessary."]},{"l":"Genome Build","p":["Genome Nexus supports both GRCh37 and GRCh38, but support for the latter is limited. Several annotation sources served by Genome Nexus might not have official GRCh38 support yet i.e. OncoKB, CIViC, Cancer Hotspots, My Cancer Genome and 3D structures. Although most of the time the canonical transcript for a gene will be the same between GRCh37 and GRCh38 there might be some that cause issues. In addition the complete integration of cBioPortal with Genome Nexus' GRCh38 is not complete yet. That is cBioPortal currently only connects to one Genome Nexus API by default (the GRCh37 one), so it's not possible to have multiple genome builds in one instance of cBioPortal and get the correct annotations from Genome Nexus for both. Currently only the mutation mapper tool page is able to handle both."]},{"l":"Properties","p":["By default the Genome Nexus API url is set to https://v1.genomenexus.org/, which uses GRCh37. It can be overridden using the following property:","Genome Nexus provides a set of mappings from Hugo genes names to Ensembl transcript IDs. There are two mappings: mskcc and uniprot. You can read more about the difference between those in the Mutation Data Annotation Section. The default is currently uniprot, but we recommend new installers to use mskcc and people with older installations to consider migrating. The property can be changed with:","The mutation mapper tool page can annotate GRCh38 coordinates. By default it uses https://grch38.genomenexus.org. It can be overridden by setting:","The GRCh38 annotation in mutation mapper can be hidden by setting show.mutation_mappert_tool.grch38=false, by default it's set to true."]},{"l":"MDACC Heatmap Integration","p":["MDACC Heatmap integration (button in OncoPrint heatmap dropdown and tab on Study page can be turned on or off by setting the following property:"]},{"l":"OncoPrint","p":["The default view in OncoPrint (\"patient\" or \"sample\") can be set with the following option. The default is \"patient\".","Configuration of tracks that will be visible by default in the oncoprint. It points to a JSON file on the classpath."]},{"l":"Custom annotation of driver and passenger mutations","p":["cBioPortal supports 2 formats to add custom annotations for driver and passenger mutations.","cbp_driver: This will define whether a mutation is a driver or not.","cbp_driver_tiers: This can be used to define multiple classes of driver mutations.","These data formats are described in the cBioPortal MAF specifications.","Enabling custom annotations in the OncoPrint","To enable functionality for one or both types of custom annotations, enter values for the following properties. These labels will appear in the OncoPrint's \"Mutation color\" menu.","Automatic selection of OncoKB, hotspots and custom annotations","OncoKB and Hotspots are by default automatically selected as annotation source, if show.oncokb and show.hotspots are set to true. To add automatic selection of custom driver or custom driver tiers annotations, set the respective property to true. Default is false.","If you want to disable the automatic selection of OncoKB and hotspots as annotation source, set these properties to false:","If you want to enable oncoprint heatmap clustering by default, set this property to true:","Automatic hiding of variants of unknown significance (VUS)","By default, the selection box to hide VUS mutations is unchecked. If you want to automatically hide VUS, set this property to true. Default is false."]},{"l":"Gene sets used for gene querying","p":["To change the gene sets used for gene querying, create a JSON file and add gene sets, following the format specified in the examples below. Set the path to this file (e.g. file:/cbioportal/custom_gene_sets.json) in the following property and restart Tomcat to apply the update. The default gene sets will be replaced by the ones in custom_gene_sets.json."]},{"l":"Example with gene names","p":["In this example, two gene sets will appear in the query page, under the names \"Prostate Cancer: AR Signaling\" and \"Prostate Cancer: AR and steroid synthesis enzymes\"."]},{"l":"Example with specific alterations","p":["In this example, only one gene set will appear in the query page, under the name \"Genes with alterations\", which will add the different genetic alterations stated below in the query box."]},{"l":"Example with merged gene tracks","p":["In this example, only one gene set will appear in the query page, under the name \"BRCA genes test\", containing the merged gene track called \"BRCA genes\".","This gene set will add the following in the query box:"]},{"l":"Cache Settings","p":["cBioPortal is supported on the backend with Ehcache or Redis. These caches are configurable from within portal.properties through the following properties.","The cache type is set using persistence.cache_type. Valid values are no-cache, redis(redis), ehache-heap(ehcache heap-only), ehache-disk(ehcache disk-only), and ehache-hybrid(ehcache disk + heap). By default, persistence.cache_type is set to no-cache which disables the cache. When the cache is disabled, no responses will be stored in the cache.","⚠️ the 'redis' caching option will likely cause a conflict when installing the portal in a Tomcat installation which uses redisson for session management. If you plan to deploy cbioportal to such a system, avoid the 'redis' caching option for persistence.cache_type and be sure to build cbioportal.war with the maven option -Dexclude-redisson(see Building with Maven).","Logged metrics and additional information such as cache size and cached keys are available through an optional endpoint. The optional endpoint is turned off by default but can be turned on by setting cache.statistics_endpoint_enabled to true.","The cache statistics endpoint is hidden on the api page; users must directly access the URL to view the response. The cache statistics endpoint can be accessed in the following ways.","For a list of all keys in the cache:","For a list of counts of keys in cache per repository class:","For general statistics about the cache such as memory usage (not currently implemented for Redis):","WARNING: It must be noted that since cache statistics endpoint returns data on cache keys, the endpoint may expose otherwise hidden database query parameters such as sample identifiers, study names, etc. Generally, it is recommended that the endpoint only be turned on during cache-related development for testing. Deployers of a protected portal where users only have authorities to a subset of studies should carefully consider whether or not to turn on the cache statistics endpoint, as it does not filter the results.","For more information on how caching is implemented in cBioPortal refer to the Caching documentation."]},{"l":"Redis","p":["To cache with Redis set persistence.cache_type to redis.","To setup the Redis cache servers the following properties are required:","If you are running one redis instance for multiple instances of cBioPortal, one can use the properties redis.name and redis.database to avoid clashes. If you are running only one instance of cBioPortal any value for name/database will do.","There are also some optional parameters:","redis.clear_on_startup: If true, the caches will clear on startup. This is important to do to avoid reading old study data from the cache. You may want to turn it off and clear redis yourself if you are running in a clustered environments, as you'll have frequent restarts that do not require you to clear the redis cache. redis.ttl_mins: The time to live of items in the general cache, in minutes. The default value is 10000, or just under 7 days.","For more information on Redis, refer to the official documentation here"]},{"l":"Ehcache","p":["To cache with Ehcache set persistence.cache_type to ehache-heap(ehcache heap-only), ehache-disk(ehcache disk-only), or ehache-hybrid(ehcache disk + heap).","Ehcache initializes caches using a template found in an Ehcache xml configuration file. When caching is enabled, set ehcache.xml_configuration to the name of the Ehcache xml configuration file. The default provided is ehcache.xml; to change the cache template, directly edit this file. Alternatively, you can create your own Ehcache xml configuration file, place it under /persistence/persistence-api/src/main/resources/ and set ehcache.xml_configuration to /[Ehcache xml configuration filename].","If the cache is configured to use disk resources, users must make a directory available and set it with the ehcache.persistence_path property. Ehcache will create separate directories under the provided path for each cache defined in the ehcache.xml_configuration file.","Cache size must be set for heap and/or disk depending on which are in use; Ehcache requires disk size to be greater than heap size in a hybrid configuration. Zero is not a supported size and will cause an exception. Units are in megabytes. Default values are provided. The general repository cache is specified to use 1024MB of heap and 4096MB of disk. The static repository cache is specified to use 30MB of heap and 32MB of disk. For installations with increased traffic or data, cache sizes can be increased to further improve performance.","For more information on Ehcache, refer to the official documentation here"]},{"i":"evict-caches-with-the-apicache-endpoint","l":"Evict caches with the /api/cache endpoint","p":["DELETE http requests to the /api/cache endpoint will flush the cBioPortal caches, and serves as an alternative to restarting the cBioPortal application.","By default the endpoint is disabled. The endpoint can be enabled by setting:","Access to the endpoint is not regulated by the configured user authorization mechanism. Instead, an API key should be passed with the X-API-KEY header. The accepted value for the API key can be configured by setting (for example):"]},{"l":"Delegate user-authorization cache to Spring-managed cache","p":["For evaluation fo user permissions cBioPortal uses a user-authorization cache that is populated at startup. By setting the cache.cache-map-utils.spring-managed property to true this cache will be managed by the Spring caching solution such as EHCache or Redis. For more extended information, see here"]},{"l":"Enable GSVA functionality","p":["GSVA functionality can be enabled by uncommenting this line (and making sure it is set to true):"]},{"l":"Set default thresholds for geneset hierarchy"},{"l":"Collapses the tree widget of the geneset hierarchy dialog on initialization","p":["By default, the tree is expanded (property value is false)."]},{"l":"Cross study expression and protein data","p":["By default we hide expression data for multi-study queries as they are usually not normalized across studies. For the public cBioPortal for instance, only TCGA Pancan Atlas studies expression data has been normalized.","If you know the expression data in your instance is comparable, or is comparable for a subset of studies, you can configure a rule as follows.","The value of this property can be boolean (true|false) or a javascript function which executes at runtime and is passed the list of study objects being queried by the user and evaluates whether expression data can be safely displayed."]},{"l":"Combined Study View Summary Limits"},{"l":"Background","p":["A limit is added to prevent poor performance of Study View when selecting too large sample numbers."]},{"i":"properties-1","l":"Properties","p":["studyview.max_samples_selected: Limit is disabled when not set"]},{"l":"Behavior","p":["When these limits are exceeded the \"Explore Selected Studies\" button will be disabled on the Study View Page."]},{"l":"Request Body Compression"},{"i":"background-1","l":"Background","p":["Some REST endpoints that the cBioPortal frontend uses have request bodies that scale as your dataset increases. In portals where users commonly query more than 100,000 samples, we found that some of these request bodies could get as large as 20 Mb. These large request bodies pose a significant problem for users with poor upload speeds - some users experienced upload times of more than five minutes for these requests. Request body compression is our temporary solution to this problem. When this feature is toggled on, we compress the request bodies of a few problematic endpoints."]},{"i":"properties-2","l":"Properties","p":["There are two portal.property values related to this feature:","enable_request_body_gzip_compression: when true, the feature will be enabled.","request_gzip_body_size_bytes: the maximum allowable unzipped request body in bytes. Defaults to 80000000 (80 Mb)."]},{"i":"behavior-1","l":"Behavior","p":["This is a nonbreaking change. Any consumers of the cBioPortal API you have that send requests with uncompressed request bodies will continue to work, regardless of whether you turn this feature on or off.","If you turn this feature on, the cBioPortal API will now be able to handle any request with a gzipped request body, provided:","It is a POST request.","It has a Content-Encoding: gzip header."]},{"l":"Reasons to Enable This Feature","p":["You have studies with tens of thousands of samples.","You have users with poor upload speeds (< 1mb up)."]},{"l":"Reasons to Disable This Feature","p":["It is harder to debug gzipped requests","Chrome's copy request as CURL will not work.","The compressed request body is not human-readable.","It is a potential vector for denial of memory attacks.","Any request that has a body that takes significantly more space in memory than it does in the request body is potentially dangerous. We try to mitigate this by limiting the size of the unzipped request body via the request_gzip_body_size_bytes property, but at a fundamental level, this is still a concern.","Along these lines, if you do enable this feature, setting request_gzip_body_size_bytes to an arbitrarily large number would be unwise.","This is not a cure-all for performance issues","Most requests the cBioPortal makes do not have large request bodies, so most requests will not be compressed, and will see no performance improvement.","Users with good upload speeds will see minimal performance improvements, as their upload speed is not a bottleneck."]},{"i":"datasets-tab-study-download-links","l":"DataSets Tab (Study Download Links)"},{"i":"background-2","l":"Background","p":["The DataSets tab has the ability to create a download button that allows users to quickly download \"raw\" public studies."]},{"i":"properties-3","l":"Properties","p":["study_download_url: when set, the feature will be enabled"]},{"i":"behavior-2","l":"Behavior","p":["For private instances that want to replicate the public-portal they first must set up their studies they want available for download in a similar format to what is described in the Example section below. The studies are located on the public-portal at https://cbioportal-datahub.s3.amazonaws.com/. Then there is a study_list.json defined that list the studies that can be downloaded. The studies to be downloaded need to be compressed with the extension tar.gz"]},{"l":"Example","p":["We have set study_download_url property to https://cbioportal-datahub.s3.amazonaws.com/","study_list.json resides https://cbioportal-datahub.s3.amazonaws.com/study_list.json","[ acbc_mskcc_2015, acc_2019] Example of contents","acbc_mskcc_2015.tar.gz resides https://cbioportal-datahub.s3.amazonaws.com/acbc_mskcc_2015.tar.gz"]},{"l":"Prioritized studies on study selector view","p":["By default, the studies loaded into a local cBioPortal instance are organized based on their cancer type (i.e. Breast >> Other).","The value of this variable will create a custom category with studies on the top of the study selector view. The format for the string should be category1#study1a,study1b,study1c;category2#study2 (e.g., PanCancer Studies#msk_impact_2017), where the category can be any string and the study should be the study ID of the required uploaded study."]}],[{"l":"Backend Caching","p":["cBioPortal provides the option of caching information on the backend to improve performance. Without caching, every time a request is received by the backend, a query is sent to the database system for information, and the returned data is processed to construct a response. This may lead to performance issues as the entire process can be rather costly, especially for queries on larger studies. With caching turned on, query responses can be taken directly from the cache if they have already been constructed. They would only be constructed for the initial query."]},{"l":"Cache Configuration","p":["The portal is configured to use Ehcache or Redis for backend caching. Ehcache supports a hybrid (disk + heap), disk-only, and heap-only mode. Redis stores the cache in memory and periodically writes the updated data to disk. Cache configuration is specified inside portal.properties(more information here)."]},{"l":"Creating additional caches","p":["The default configuration initializes two separate caches; however, you may wish to introduce new caches for different datatypes. Please see the Redis and Ehcache sections to see how to set up a new cache in whichever system you are using."]},{"l":"Redis","p":["Cache initialization is handled inside the CustomRedisCachingProvider. To create additional caches (e.g creating a cache specifically for clinical data), new code must be added to the CustomRedisCachingProvider.","Within the CustomRedisCachingProvider, create your new cache using the CacheManager. The appName must be prepended to your cache name.","You also need to create a new cache resolver in RedisConfig.java:","The @Cacheable annotation must also be added (or adjusted) to function declarations to indicate which functions are to be cached. Those might look like this example:","For more information on linking caches to functions, refer to the documentation here."]},{"l":"Ehcache","p":["Within the CustomEhcachingProvider, initialize a new ResourcePoolsBuilder for the new cache and set the resources accordingly.","After initialzing the ResourcePoolsBuilder, create a CacheConfiguration for the new cache using the new ResourcePoolsBuilder just created.","Finally, add the new CacheConfiguration to the map of managed caches with a name for the cache.","You also need to create a new cache resolver in EhCacheConfig.java:","The @Cacheable annotation must also be added (or adjusted) to function declarations to indicate which functions are to be cached. Those might look like this example:","Additionally, new properties for setting cache sizes should be added to portal.properties and loaded into the CustomEhcachingProvider. Alternatively, values may be hardcoded directly inside CustomEhcachingProvider.","For more information on cache templates and the Ehcache xml configuration file, refer to the documentation here.","For more information on linking caches to functions, refer to the documentation here."]},{"l":"User-authorization cache","p":["In addition to the above-mentioned Spring-managed caches, cBioPortal maintains a separate cache that holds references to sample lists, molecular profiles and cancer studies. This user-authorization cache is used to establish whether a user has access to the data of a particular sample list or molecular profile based on study-level permissions.","By default, the user-authorization cache is implemented as a HashMap that is populated when cBioPortal is started. This implementation allows for very fast response times of user-permission evaluation.","The user-authorization cache can be delegated to the Spring-managed caching solution by setting the cache.cache-map-utils.spring-managed to true. Depending on the implementation, this may add a delay to any data request that is caused by the additional consultation of the external cache. This configuration should only be used where a central caching solution is required or no instance/container-specific local caches are allowed. For example, cache eviction via the api/cache endpoint in a Kubernetes deployment of cBioPortal where multiple pods/containers that represent a single cBioPortal instance is possible with a Spring-managed user-authorization cache because a call to this endpoint in a single pod/container invalidates Redis caches for the entire deployment thereby preventing inconsistent state of user-authorization caches between pods."]},{"l":"Cache eviction","p":["When the database is updated (e.g new studies loaded, existing study updated, new gene panel imported) the caches of a cBioPortal instance should be updated. One way is to restart the cBioPortal spring application. When using Redis, this will work only when redis.clear_on_startup is not set to false.","Alternatively, caches can be cleared (a.k.a. evicted) by calling the /api/cache endpoint. Advantage of the cache eviction end point is that user-sessions remain undisturbed since the portal instance is not restarted. By default the cache eviction enpoint is disabled and can be enabled by setting cache.endpoint.enabled to true. The endpoint is secured by a secret API key that can be customized with the cache.endpoint.api-key property. Caches are evicted by making a DELETE request to the endoint while passing the API key in the X-API-KEY header. When using curl use the following command (replace the API key for the value configured in portal.properties):"]},{"l":"Cache eviction after cancer study updates","p":["When a study is added, deleted or updated, a more selective cache eviction strategy is possible, where only affected cached data is evicted. This more selective cache eviction is triggered by calling the /api/cache/{studyId} endpoint where is the cancer_study_identifier stated in the meta-study.txt file. When using curl use the following command after update of a study with study identifier my_cancer_study(replace the API key for the value configured in portal.properties):",":important: This endpoint can ony be used when adding/deleting/updating a study. When data related to gene panels or gene sets is updated, all caches should be evicted with a call to /api/cache."]},{"i":"how-does-study-specific-cache-eviction-work","l":"How does study-specific cache eviction work?"},{"l":"Structure of cache keys","p":["The caching solutions integrated with cBioPortal (EHCache and Redis) store data as key-value pairs. Each key represents a method call signature that contains the Java class name, the method name and a serialized representation of all method arguments. For instance this is the key for a call to the CancerTypMyBatisRepository.getAllCancerTypes() method with arguments [ SUMMARY, 10000000, 0, null, ASC]:","Cached data that relates to a specific study can be recognized by the occurrence of the cancer study identifier anywhere in the method arguments. The study identifier can occur in the method arguments because it is passed as argument itself, like here for a study with identifier study_es_0:","Alternatively the study identifier is present as the prefix of referenced study entities. For example, this is the request for all molecular profiles:"]},{"l":"Cache eviction rules","p":["When a study is added, deleted or updated, all caches are evicted where the respective key meets any of these requirements:","The key contains the cancer study identifier of the study that is added, deleted or updated.","The key does not contain the cancer study identifier of any study present in the database.","The rationale behind rule 1. is that when a key references data for the affected study it points to potentially outdated data and its associated cache should be evicted. The rationale behind rule 2. is that any key that does not reference data for any study potentially points to data derived from all studies in the database, including the affected study, and its associated cache should be evicted. Because not every key without study identifiers necessarily points to study related data, this rule is overly broad. At the moment of this writing we were unable to implement reliable methods that would further specify such keys. This might be a start-off point for future optimizations."]}],[{"l":"Study View Customization"},{"l":"How does the study view organize the charts","p":["Study view page is fully responsive, it will try to fit as many charts as possible based on the browser's width and height.","The layout of a chart is determined mainly based on priority. Higher priority will promote chart closer to the left-top.","In order to improve the layout, we added a layout algorithm layer. The study view page is using grid layout. All charts will be put into 2-dimensional systems. For example, pie chart, by default, takes 1 block and bar chart uses two blocks. All charts will be placed from left to right, top to bottom. In order to prevent misalignment, we promote small charts to fit into the space.","For logged-in(authenticated) users, charts layout is saved to users profile i.e, whenever user tries to re-visits the same url, previously saved layout will be loaded."]},{"l":"Study View Customization with Priority Data","p":["Example of study view in public portal: https://www.cbioportal.org/study?id=acc_tcga,lgg_tcga#summary"]},{"l":"Priorities","p":["20","200","2000","30","300","3000","40","400","70","8","80","9","90","Additional Info","AGE","Cancer Studies","CANCER_TYPE","CANCER_TYPE_DETAILED","Chart name(clinical attribute ID)","CNA Bar Chart","CNA Genes Table","Currently, we preassigned priority to few charts, but as long as you assign a priority in the database except than 1, these preassigned priorities will be overwritten.","Disease Free Survival Plot","Frontend default priority","GENDER, SEX","Mutated Genes Table","Mutation Count Bar Chart","Mutation Count vs. Fraction of Genome Altered Density Plot","Number of Samples Per Patient","Overall Survival Plot","The default score is 1.","The priority system is represented with a final score. The higher the final (numeric) score, the higher priority assigned.","This is combination of DFS_MONTH and DFS_STATUS","This is combination of OS_MONTH and OS_STATUS","To disable the chart, set the priority to -1.(Currently disables charts for single clinical attributes only)","To promote certain chart in study view, please increase priority in the database to a certain number. The higher the score, the higher priority it will be displayed in the study view. If you want to hide chart, please set the priority to 0. For combination chart, as long as one of the clinical attributes has been set to 0, it will be hidden."]}],[{"l":"Study View Customization"},{"l":"How does the study view organize the charts","p":["Study view page is fully responsive, it will try to fit as many charts as possible based on the browser's width and height.","The layout of a chart is determined mainly based on priority. Higher priority will promote chart closer to the left-top.","In order to improve the layout, we added a layout algorithm layer. The study view page is using grid layout. All charts will be put into 2-dimensional systems. For example, pie chart, by default, takes 1 block and bar chart uses two blocks. All charts will be placed from left to right, top to bottom. In order to prevent misalignment, we promote small charts to fit into the space.","For logged-in(authenticated) users, charts layout is saved to users profile i.e, whenever user tries to re-visits the same url, previously saved layout will be loaded."]},{"l":"Study View Customization with Priority Data","p":["Example of study view in public portal: https://www.cbioportal.org/study?id=acc_tcga,lgg_tcga#summary"]},{"l":"Priorities","p":["20","200","2000","30","300","3000","40","400","70","8","80","9","90","Additional Info","AGE","Cancer Studies","CANCER_TYPE","CANCER_TYPE_DETAILED","Chart name(clinical attribute ID)","CNA Bar Chart","CNA Genes Table","Currently, we preassigned priority to few charts, but as long as you assign a priority in the database except than 1, these preassigned priorities will be overwritten.","Disease Free Survival Plot","Frontend default priority","GENDER, SEX","Mutated Genes Table","Mutation Count Bar Chart","Mutation Count vs. Fraction of Genome Altered Density Plot","Number of Samples Per Patient","Overall Survival Plot","The default score is 1.","The priority system is represented with a final score. The higher the final (numeric) score, the higher priority assigned.","This is combination of DFS_MONTH and DFS_STATUS","This is combination of OS_MONTH and OS_STATUS","To disable the chart, set the priority to -1.(Currently disables charts for single clinical attributes only)","To promote certain chart in study view, please increase priority in the database to a certain number. The higher the score, the higher priority it will be displayed in the study view. If you want to hide chart, please set the priority to 0. For combination chart, as long as one of the clinical attributes has been set to 0, it will be hidden."]}],[{"l":"Introduction","p":["Using OncoKB does not require a data access token. But the instance you are connecting to only includes biological information by default. If you want to include tumor type summary, therapeutic levels and more, please consider obtaining a license from OncoKB."]},{"l":"How to obtain an OncoKB license","p":["Please review OncoKB terms","Please request for data access","You can find your token in your Account Settings after login."]},{"l":"Set up cBioPortal to include full OncoKB content","p":["Following properties can be edited in the portal.properties file or set in system variables if you are using docker.","show.oncokb should be set to true","oncokb.token should be set to a valid OncoKB access token value","oncokb.public_api.url should be set to https://www.oncokb.org/api/v1","Thank you for supporting future OncoKB development."]},{"l":"Include MSI-H and TMB-H annotation","p":["If you want to include the MSI-H and TMB-H annotation on patient view, please follow the instruction to import required clinical data. For MSI-H, a clinical attribute MSI_TYPE with value Instable is required. For TMB-H, a clinical attribute TMB_SCORE with value >=10 is required."]},{"l":"Disable OncoKB Service","p":["Please set show.oncokb to false in portal.properties or in system variables if you are using docker."]}],[{"l":"Data Loading"},{"l":"Introduction","p":["This page is the starting point for data loading. The General Overview section below contains all the required steps to get you started."]},{"l":"General Overview","p":["Getting your study data into cBioPortal requires four steps:","Setting up the validator","Preparing your study data","Validating your study data","Loading your study data"]},{"l":"Setting up the validator"},{"l":"Installation","p":["If you have a git clone of cBioPortal, the relevant scripts can be found in the folder: your_cbioportal_dir/core/src/main/scripts/importer"]},{"l":"Dependencies","p":["The scripts run in Python 3.4 or newer, and they require the modules requests and pyyaml. You can use this command to install those modules:","If you want the scripts to be able to generate html reports (recommended way for reading the validation errors, if any), then you will also need to install Jinja2. You can use this command:"]},{"l":"Preparing Study Data","p":["A study to be loaded in cBioPortal can basically consist of a directory where all the data files are located. Each data file needs a meta file that refers to it and both files need to comply to the format required for the specific data type. The format and fields expected for each file are documented in the File Formats page. Below is an example of the files in such a directory."]},{"l":"Rules","p":["There are just a few rules to follow:","meta_study, meta_clinical and respective clinical data file are the only mandatory files.","cancer type files can be mandatory if the study is referring to a cancer type that does not yet exist in the DB.","meta files can be named anything, as long as it starts or ends with name 'meta'. E.g. meta_test, meta.test, test.meta are all fine; metal_test and metastudy are wrong.","data files can be named anything and are referenced by a property data_filename set in the meta file."]},{"l":"Validating your study data","p":["Once all files are in place and follow the proper format, you can validate your files using the dataset validator script.","The validation can be run standalone, but it is also integrated into the metaImport script, which validates the data and then loads it if validation succeeds."]},{"l":"Loading Data","p":["To load the data into cBioPortal, the metaImport script has to be used. This script first validates the data and, if validation succeeds, loads the data."]},{"l":"Removing a Study","p":["To remove a study, the cbioportalImporter script can be used."]},{"l":"Example studies","p":["Examples for the different types of data are available on the File Formats page. The Provisional TCGA studies, downloadable from the Data Sets section are complete studies that can be used as reference when creating data files."]}],[{"l":"Downloads","p":["This page describes the various files available for download. The first section is targeted towards users of cBioPortal. The second section towards maintainers of cBioPortal instances."]},{"l":"User Downloads","p":["There are several ways in which one can download data from cBioPortal including manual and programmatic approaches. See options outlined below."]},{"l":"Datasets Page","p":["A zip file for each study on cbioportal.org can be download from the Datasets Page. One can also use the R client cBioPortalData to programmatically download all of these files."]},{"l":"Datahub","p":["The files for each study are also available from our datahub repository. This is basically the extracted version of the zip files in the Datasets Page. Note that this is a git LFS repo so if you are familiar with git you might prefer using this option."]},{"l":"API and API Clients","p":["Besides downloading all the study data one can also request slices of the data using the API. A slice of the data could e.g. be \"give me all the mutation data for one patient\" or \"get me all EGFR mutations for a particular group of samples\". There are API clients available in a variety of languages including bash, R and Python. See for more information the API documentation."]},{"l":"Instance Maintainer Downloads","p":["As an instance maintainer of cBioPortal there are a variety of files that might be helpful. See below."]},{"l":"Study staging files","p":["Staging files for the studies on cbioportal.org can be download from the Datasets Page. These studies can be validated and loaded in a local cBioPortal instances using the validator and importer. Any issues with a downloaded study can be reported on cBioPortal DataHub.","Example studies","TCGA Provisional studies often contain many different data types. These are excellent examples to use as reference when creating your own staging files. A detailed description on supported data types can be found in the File Formats documentation."]},{"l":"Complete cBioPortal database","p":["A MySQL database dump of the complete cbioportal.org database can be found here: http://download.cbioportal.org/mysql-snapshots/public-portal-dump.latest.sql.gz"]},{"l":"Seed Database","p":["The seed database is a MySQL dump for seeding a new instance of the cBioPortal. Instructions for loading the seed database can be found here. The seed database for human can be downloaded from cBioPortal Datahub. A mouse version can be found here."]}],[{"l":"Using the dataset validator","p":["To facilitate the loading of new studies into its database, cBioPortal provides a set of staging files formats for the various data types. To validate your files you can use the dataset validator script."]},{"l":"Running the validator","p":["To run the validator first go to the importer folder cbioportal_source_folder/core/src/main/scripts/importer and then run the following command:","This will tell you the parameters you can use:","For more information on the --portal_info_dir option, see Offline validation below. If your cBioPortal is not using hg19, you have to specify the reference_genome field in your meta_study.txt.","For more information, see Validation of non-human data.","When running the validator with parameter -r the validator will run the validation of the clinical data it will ignore all failing checks about values in the headers of the clinical data file.","When running the validator with parameter -m the validator will run the validation of the specific MAF file checks for the mutation file in strict maf check mode. This means that when the validator encounters these validation checks it will report them as an error instead of a warning."]},{"i":"example-1-test-study_es_0","l":"Example 1: test study_es_0","p":["As an example, you can try the validator with one of the test studies found in cbioportal_source_folder/core/src/test/scripts/test_data. Example, assuming port 8080 and using -v option to also see the progress:","Results in:","When using the -html option, a report will be generated, which looks like this for the previous example: Screenshot of a successful validation report"]},{"i":"example-2-test-study_es_1","l":"Example 2: test study_es_1","p":["More test studies for trying the validator ( study_es_1 and study_es_3) are available in cbioportal_source_folder/core/src/test/scripts/test_data. Example, assuming port 8080 and using -v option:","Results in:","And respective HTML report: Screenshot of an unsuccessful validation report"]},{"l":"Offline validation","p":["The validation script can be used offline, without connecting to a cBioPortal server. The tests that depend on information specific to the portal (which clinical attributes and cancer types have been previously defined, and which Entrez gene identifiers and corresponding symbols are supported), will instead be read from a folder with .json files generated from the portal."]},{"i":"example-3-validation-with-a-portal-info-folder","l":"Example 3: validation with a portal info folder","p":["To run the validator with a folder of portal information files, add the -p/--portal_info_dir option to the command line, followed by the path to the folder:"]},{"i":"example-4-generating-the-portal-info-folder","l":"Example 4: generating the portal info folder","p":["The portal information files can be generated on the server, using the dumpPortalInfo script. Go to cbioportal_source_folder/core/src/main/scripts, make sure the environment variables $JAVA_HOME and $PORTAL_HOME are set, and run dumpPortalInfo.pl with the name of the directory you want to create:"]},{"i":"example-5-validating-without-portal-specific-information","l":"Example 5: validating without portal-specific information","p":["Alternatively, you can run the validation script with the -n/--no_portal_checks flag to entirely skip checks relating to installation-specific metadata. Be warned that files succeeding this validation may still fail to load (correctly)."]},{"l":"Validation of non-human data","p":["When importing a study with a reference genome other than hg19/GRCh37, this should be specified in the meta_study.txt file, next to the reference_genome field. Supported values are hg19, hg38 and mm10.","cBioPortal is gradually introducing support for mouse. If you want to load mouse studies and you have to set up your database for mouse.","As an example, the command for the mouse example using the three parameters is given:"]},{"l":"Running the validator for multiple studies","p":["The importer folder cbioportal_source_folder/core/src/main/scripts/importer also contains a script for running the validator for multiple studies:","The following parameters can be used:","Parameters --url_server, --portal_info_dir, --no_portal_checks and --portal_properties are equal to the parameters with the same name in validateData.py. The script will save a log file with validation output ( log-validate-studies.txt) and output the validation status from the input studies:"]},{"i":"example-1-root-directory-parameter","l":"Example 1: Root directory parameter","p":["Validation can be run for all studies in a certain directory by using the --root-directory parameter. The script will append each folder in the root directory to the study list to validate:"]},{"i":"example-2-list-of-studies-parameter","l":"Example 2: List of studies parameter","p":["Validation can also be run for specific studies by using the --list-of-studies parameter. The paths to the different studies can be defined and seperated by a comma:"]},{"i":"example-3-combination-root-directory-and-list-of-studies-parameter","l":"Example 3: Combination root directory and list of studies parameter","p":["Validation can also be run on specific studies in a certain directory by combining the --root-directory and --list-of-studies parameter:"]},{"i":"example-4-html-folder-parameter","l":"Example 4: HTML folder parameter","p":["When HTML validation reports are desired, an output folder for these HTML files can be specified. This folder does not have to exist, the script can create the folder. The HTML validation reports will get the following name: study_name-validation.html. To create HTML validation reports for each study the --html-folder parameter needs to be defined:"]}],[{"l":"Using the metaImport script"},{"l":"Importing Data into cBioPortal","p":["The metaImport script should be used to automate the process of validating and loading datasets. It also has some nice features like an extra option to only load datasets that completely pass validation (i.e. with no errors, while warnings can be explicitly allowed by the user)."]},{"l":"Running the metaImport Script","p":["To run the metaImport script first go to the importer folder your_cbioportal_dir/core/src/main/scripts/importer and then run the following command:","This will tell you the parameters you can use:"]},{"l":"Example of Importing a study","p":["Export PORTAL_HOME as explained here, e.g.","and then run (this simple command only works if your cBioPortal is running at http://localhost/cbioportal - if this is not the case, follow the advanced example):"]},{"l":"Advanced Example","p":["This example imports the study to the localhost, creates an html report and shows status messages.","By adding -o, warnings will be overridden and import will start after validation."]},{"i":"development--debugging-mode","l":"Development / debugging mode","p":["For developers and specific testing purposes, an extra script, cbioportalImporter.py, is available which imports data regardless of validation results. Check this page for more information on how to use it."]}],[{"i":"#","p":["Arm Level CNA Data","Cancer Study","Cancer Type","Case Lists","Clinical Data","Continuous Copy Number Data","Custom namespace columns","Discrete Copy Number Data","Expression Data","Formats","Fusion Data(DEPRECATED)","Gene Panel Data","Gene Set Data","Generic Assay","GISTIC 2.0 Data","Introduction","Methylation Data","Mutation Data","Mutational Signature Data","Mutsig Data","Protein level Data","Resource Data","Segmented Data","Structural Variant Data","Study Tags file","Timeline Data"]},{"l":"Introduction","p":["This page describes the file formats that cancer study data should assume in order to be successfully imported into the database. Unless otherwise noted, all data files are in tabular-TSV (tab separated value) format and have an associated metadata file which is in a multiline record format. The metadata and data files should follow a few rules documented at the Data Loading page."]},{"l":"Formats"},{"l":"Cancer Study","p":["As described in the Data Loading tool page, the following file is needed to describe the cancer study:"]},{"l":"Meta file","p":["This file contains metadata about the cancer study. The file contains the following fields:","type_of_cancer: The cancer type abbreviation, e.g., \"brca\". This should be the same cancer type as specified in the meta_cancer_type.txt file, if available. The type can be \"mixed\" for studies with multiple cancer types.","cancer_study_identifier: A string used to uniquely identify this cancer study within the database, e.g., \"brca_joneslab_2013\".","name: The name of the cancer study, e.g., \"Breast Cancer (Jones Lab 2013)\".","description: A description of the cancer study, e.g., \"Comprehensive profiling of 103 breast cancer samples. Generated by the Jones Lab 2013\". This description may contain one or more URLs to relevant information.","citation (Optional): A relevant citation, e.g., \"TCGA, Nature 2012\".","pmid (Optional): One or more relevant pubmed ids (comma separated without whitespace). If used, the field citation has to be filled, too.","groups (Optional): When using an authenticating cBioPortal, lists the user-groups that are allowed access to this study. Multiple groups are separated with a semicolon \";\". The study will be invisible to users not in at least one of the listed groups, as if it wasn't loaded at all. e.g., \"PUBLIC;GDAC;SU2C-PI3K\". see User-Authorization for more information on groups","add_global_case_list (Optional): set to 'true' if you would like the \"All samples\" case list to be generated automatically for you. See also Case lists.","tags_file (Optional): the file name containing custom study tags for the study tags.","reference_genome (Optional): the study reference genome (e.g. hg19, hg38). Without specifying this property, the study will be assigned to the reference genome specified in portal.properties(property ucsc.build)."]},{"l":"Example","p":["An example meta_study.txt file would be:"]},{"l":"Cancer Type","p":["If the type_of_cancer specified in the meta_study.txt does not yet exist in the type_of_cancer database table, a meta_cancer_type.txt file is also mandatory."]},{"i":"meta-file-1","l":"Meta file","p":["The file is comprised of the following fields:","genetic_alteration_type: CANCER_TYPE","datatype: CANCER_TYPE","data_filename: your datafile"]},{"i":"example-1","l":"Example","p":["An example meta_cancer_type.txt file would be:"]},{"l":"Data file","p":["The file is comprised of the following columns in the order specified:","type_of_cancer: The cancer type abbreviation, e.g., \"brca\".","name: The name of the cancer type, e.g., \"Breast Invasive Carcinoma\".","dedicated_color: CSS color name of the color associated with this cancer study, e.g., \"HotPink\". See this list for supported names, and follow the awareness ribbons color schema. This color is associated with the cancer study on various web pages within the cBioPortal.","parent_type_of_cancer: The type_of_cancer field of the cancer type of which this is a subtype, e.g., \"Breast\". ℹ️ : you can set parent to tissue, which is the reserved word to place the given cancer type at \"root\" level in the \"studies oncotree\" that will be generated in the homepage (aka query page) of the portal."]},{"i":"example-2","l":"Example","p":["An example record would be:"]},{"l":"Clinical Data","p":["The clinical data is used to capture both clinical attributes and the mapping between patient and sample ids. The software supports multiple samples per patient.","As of March 2016, the clinical file is split into a patient clinical file and a sample clinical file. The sample file is required, whereas the patient file is optional. cBioPortal has specific functionality for a core set of patient and sample columns, but can also display custom columns (see section \"Custom columns in clinical data\")."]},{"l":"Meta files","p":["The two clinical metadata files (or just one metadata file if you choose to leave the patient file out) have to contain the following fields:","cancer_study_identifier: same value specified in meta_study.txt","genetic_alteration_type: CLINICAL","datatype: PATIENT_ATTRIBUTES or SAMPLE_ATTRIBUTES","data_filename: your datafile"]},{"l":"Examples","p":["An example metadata file, e.g. named meta_clinical_sample.txt, would be:","An example metadata file, e.g. named meta_clinical_patient.txt, would be:"]},{"l":"Data files","p":["For both patients and samples, the clinical data file is a two dimensional matrix with multiple clinical attributes. When the attributes are defined in the patient file they are considered to be patient attributes; when they are defined in the sample file they are considered to be sample attributes.","The first four rows of the clinical data file contain tab-delimited metadata about the clinical attributes. These rows have to start with a '#' symbol. Each of these four rows contain different type of information regarding each of the attributes that are defined in the fifth row:","Row 1: The attribute Display Names: The display name for each clinical attribute","Row 2: The attribute Descriptions: Long(er) description of each clinical attribute","Row 3: The attribute Datatype: The datatype of each clinical attribute (must be one of: STRING, NUMBER, BOOLEAN)","Row 4: The attribute Priority: A number which indicates the importance of each attribute. In the future, higher priority attributes will appear in more prominent places than lower priority ones on relevant pages (such as the Study View). A higher number indicates a higher priority.","Please note: Priority is not the sole factor determining which chart will be displayed first. A layout algorithm in study view also makes a minor adjustment on the layout. The algorithm tries to fit all charts into a 2 by 2 matrix (Mutated Genes Table occupies 2 by 2 space). When a chart can not be fitted in the first matrix, the second matrixed will be generated. And the second matrix will have lower priority than the first one. If later chart can fit into the first matrix, then its priority will be promoted.","Please see here for more detailed information about how study view utilize priority and how the layout is calculated based on priority.","Row 5: The attribute name for the database: This name should be in upper case.","Row 6: This is the first row that contains actual data."]},{"l":"Example clinical header","p":["Below is an example of the first 4 rows with the respective metadata for the attributes defined in the 5th row."]},{"l":"Clinical patient columns","p":["AGE: Age at which the condition or disease was first diagnosed, in years (number)","Custom attributes:","Custom Clinical Attribute Headers: Any other custom attribute can be added as well. See section \"Custom columns in clinical data\".","DFS_MONTHS: Disease free (months) since initial treatment","DFS_STATUS: Disease free status since initial treatment","GENDER or SEX: Gender or sex of the patient (string)","In the patient view, 0:DiseaseFree creates a green label, 1:Recurred/Progressed a red label.","In the patient view, 0:LIVING creates a green label, 1:DECEASED a red label.","Note on survival plots: to generate the survival plots successfully, the columns are required to be in pairs, which means the file should have a pair of columns that have the same prefix but ending with _STATUS and _MONTHS individually. For example, PFS_STATUS and PFS_MONTHS are a valid pair of columns that can generate the survival plots.","Note on survival status value: the value of survival status must prefixed with 0: or 1:. Value with prefix 0: means that no event (e.g. LIVING, DiseaseFree). Value with prefix 1: means that an event occurred (e.g. DECEASED, Recurred/Progressed).","OS_MONTHS: Overall survival in months since initial diagnosis","OS_STATUS: Overall patient survival status","PATIENT_DISPLAY_NAME: Patient display name (string)","PATIENT_ID (required): a unique patient ID. This field allows only numbers, letters, points, underscores and hyphens.","Possible values: 0:DiseaseFree, 1:Recurred/Progressed","Possible values: 1:DECEASED, 0:LIVING","The file containing the patient attributes has one required column:","The following columns are used by the study view as well as the patient view. In the study view they are used to create the survival plots. In the patient view they are used to add information to the header.","These columns, when provided, add additional information to the patient description in the header:","TUMOR_SITE"]},{"l":"Example patient data file"},{"l":"Clinical sample columns","p":["By adding PATIENT_ID here, cBioPortal will map the given sample to this patient. This enables one to associate multiple samples to one patient. For example, a single patient may have had multiple biopsies, each of which has been genomically profiled. See this example for a patient with multiple samples.","CANCER_TYPE_DETAILED: Cancer Type Detailed, a sub-type of the specified CANCER_TYPE","CANCER_TYPE: Cancer Type","Custom attributes:","Custom Clinical Attribute Headers: Any other custom attribute can be added as well. See section \"Custom columns in clinical data\".","If set to metastatic or metastasis: red","If set to primary or otherwise: black","If set to recurrence, recurred, progression or progressed: orange","METASTATIC_SITE or PRIMARY_SITE: Override TUMOR_SITE (patient level attribute) depending on sample type","OTHER_SAMPLE_ID: OTHER_SAMPLE_ID is no longer supported. Please replace this column header with SAMPLE_ID.","PATIENT_ID (required): A patient ID. This field can only contain numbers, letters, points, underscores and hyphens.","SAMPLE_CLASS","SAMPLE_DISPLAY_NAME: displayed in addition to the ID","SAMPLE_ID (required): A sample ID. This field can only contain numbers, letters, points, underscores and hyphens.","SAMPLE_TYPE, TUMOR_TISSUE_SITE or TUMOR_TYPE: gives sample icon in the timeline a color.","The file containing the sample attributes has two required columns:","The following columns additionally affect the Timeline data visualization:","The following columns affect the header of the patient view by adding text to the samples in the header:","The following columns are required for the pan-cancer summary statistics tab ( example)."]},{"l":"Example sample data file"},{"l":"Columns with specific functionality","p":["These columns can be in either the patient or sample file.","CANCER_TYPE: Overrides study wide cancer type","CANCER_TYPE_DETAILED","KNOWN_MOLECULAR_CLASSIFIER","GLEASON_SCORE: Radical prostatectomy Gleason score for prostate cancer","HISTOLOGY","TUMOR_STAGE_2009","TUMOR_GRADE","ETS_RAF_SPINK1_STATUS","TMPRSS2_ERG_FUSION_STATUS","ERG_FUSION_ACGH","SERUM_PSA","DRIVER_MUTATIONS"]},{"l":"Custom columns in clinical data","p":["cBioPortal supports custom columns with clinical data in either the patient or sample file. They should follow the previously described 5-row header format. Be sure to provide the correct Datatype, for optimal search, sorting, filtering (in clinical data tab) and visualization.","The Clinical Data Dictionary from MSKCC is used to normalize clinical data, and should be followed to make the clinical data comparable between studies. This dictionary provides a definition whether an attribute should be defined on the patient or sample level, as well as provides a name, description and datatype. The data curator can choose to ignore these proposed definitions, but not following this dictionary might make comparing data between studies more difficult. It should however not break any cBioPortal functionality. See GET /api/ at https://oncotree.mskcc.org/cdd/swagger-ui.html#/ for the data dictionary of all known clinical attributes."]},{"l":"Banned column names","p":["MUTATION_COUNT and FRACTION_GENOME_ALTERED are auto populated clinical attributes, and should therefore not be present in clinical data files."]},{"l":"Discrete Copy Number Data","p":["The discrete copy number data file contain values that would be derived from copy-number analysis algorithms like GISTIC 2.0 or RAE. GISTIC 2.0 can be installed or run online using the GISTIC 2.0 module on GenePattern. For some help on using GISTIC 2.0, check the Data Loading: Tips and Best Practices page. When loading case list data, the _cna case list is required. See the case list section."]},{"l":"Wide vs Long format","p":["For CNA data two formats are supported: the wide, and the long format:","Wide format: a matrix, where each row is a gene, and each column is a sample","Long format: not a matrix, each row is a gene-sample combination; this makes the file longer"]},{"l":"Wide format"},{"i":"meta-file-2","l":"Meta file","p":["The meta file is comprised of the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: COPY_NUMBER_ALTERATION","datatype: DISCRETE","stable_id: gistic, cna, cna_rae or cna_consensus","show_profile_in_analysis_tab: true","profile_name: A name for the discrete copy number data, e.g., \"Putative copy-number alterations from GISTIC\"","profile_description: A description of the copy number data, e.g., \"Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification.\"","data_filename: your datafile","gene_panel (Optional): gene panel stable id","pd_annotations_filename (Optional): name of custom driver annotations file"]},{"i":"example-3","l":"Example","p":["An example metadata file could be named meta_cna.txt and its contents could be:"]},{"i":"data-file-1","l":"Data file","p":["For each gene (row) in the data file, the following columns are required in the order specified:","One or both of:","Hugo_Symbol: A HUGO gene symbol.","Entrez_Gene_Id: A Entrez Gene identifier.","And:","An additional column for each sample in the dataset using the sample id as the column header.","For each gene-sample combination, a copy number level is specified:","\"-2\" is a deep loss, possibly a homozygous deletion","\"-1\" is a single-copy loss (heterozygous deletion)","\"0\" is diploid","\"1\" indicates a low-level gain","\"2\" is a high-level amplification."]},{"i":"example-4","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"l":"Long format"},{"i":"meta-file-3","l":"Meta file","p":["The meta file of wide format is comprised of the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: COPY_NUMBER_ALTERATION","datatype: DISCRETE_LONG Note: It will end up as datatype DISCRETE in the database, because the LONG data format is only relevant while importing.","stable_id: gistic, cna, cna_rae or cna_consensus","show_profile_in_analysis_tab: true","profile_name: A name for the discrete copy number data, e.g., \"Putative copy-number alterations from GISTIC\"","profile_description: A description of the copy number data, e.g., \"Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification.\"","data_filename: your datafile","gene_panel (Optional): gene panel stable id","namespaces (Optional): Comma-delimited list of namespaces to import."]},{"i":"example-5","l":"Example","p":["An example metadata file could be named meta_cna.txt and its contents could be:"]},{"i":"data-file-2","l":"Data file","p":["Each row contains a row-sample combination. Custom driver annotations are added as columns to the data file, just like custom namespace columns."]},{"i":"example-6","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"l":"Adding your own discrete copy number columns","p":["Additional columns can be added to the discrete copy number long data file. In this way, the portal will parse and store your own CNA fields in the database.","See Custom namespace columns for more information on adding custom columns to data files."]},{"l":"Custom driver annotations file","p":["Custom driver annotations can be defined for discrete copy number data. These annotations can be used to complement or replace default driver annotation resources OncoKB and HotSpots. Custom driver annotations can be placed in a separate file that is referenced by the pd_annotations_file field of the meta file. The annotation file can hold the following columns:","Hugo_Symbol (Optional): A HUGO gene symbol. Required when column Entrez_Gene_Id is not present.","Entrez_Gene_Id (Optional): A Entrez Gene identifier. Required when column Hugo_Symbol is not present.","SAMPLE_ID: A sample ID. This field can only contain numbers, letters, points, underscores and hyphens.","cbp_driver (Optional): \"Putative_Driver\", \"Putative_Passenger\", \"Unknown\", \"NA\" or \"\" (empty value). This field must be present if the cbp_driver_annotation is also present in the MAF file.","cbp_driver_annotation (Optional): Description field for the cbp_driver value (limited to 80 characters). This field must be present if the cbp_driver is also present in the MAF file. This field is free text. Example values for this field are: \"Pathogenic\" or \"VUS\".","cbp_driver_tiers (Optional): Free label/category that marks the mutation as a putative driver such as \"Driver\", \"Highly actionable\", \"Potential drug target\". . This field must be present if the cbp_driver_tiers_annotation is also present in the MAF file. In the OncoPrint view's Mutation Color dropdown menu, these tiers are ordered alphabetically. This field is free text and limited to 20 characters. For mutations without a custom annotation, leave the field blank or type \"NA\".","cbp_driver_tiers_annotation (Optional): Description field for the cbp_driver_tiers value (limited to 80 characters). This field must be present if the cbp_driver_tiers is also present in the MAF file. This field can not be present when the cbp_driver_tiers field is not present.","All genes referenced in the custom driver annotation file must be present in the data file for discrete copy number alterations.","The cbp_driver column flags the mutation as either driver or passenger. In cBioPortal, passenger mutations are also known as variants of unknown significance (VUS). The cbp_driver_tiers column assigns an annotation tier to the mutation, such as \"Driver\", \"Highly actionable\" or \"Potential drug target\". When a tier is selected, mutations with that annotation are highlighted as driver. Both types of custom annotations contain a second column with the suffix _annotation, to add a description. This is displayed in the tooltip that appears when hovering over the sample's custom annotation icon in the OncoPrint view.","You can learn more about configuring these annotations in the portal.properties documentation. When properly configured, the customized annotations appear in the \"Mutation Color\" menu of the OncoPrint view: schreenshot mutation color menu"]},{"i":"example-7","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"i":"gistic-20-format","l":"GISTIC 2.0 Format","p":["GISTIC 2.0 outputs a tabular file similarly formatted to the cBioPortal format, called prefix_all_thresholded.by_genes.txt. In this file the gene symbol is found in the Gene Symbol column, while Entrez gene IDs are in the Gene ID or Locus ID column. Please rename Gene Symbol to Hugo_Symbol and Gene ID or Locus ID to Entrez_Gene_Id. The Cytoband column can be kept in the table, but note that these values are ignored in cBioPortal. cBioPortal uses cytoband annotations from the map_location column in NCBI's Homo_sapiens.gene_info.gz when loading genes into the seed database."]},{"l":"Continuous Copy Number Data"},{"i":"meta-file-4","l":"Meta file","p":["The continuous copy number metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: COPY_NUMBER_ALTERATION.","datatype: CONTINUOUS","stable_id: linear_CNA","show_profile_in_analysis_tab: false.","profile_name: A name for the copy number data, e.g., \"copy-number values\".","profile_description: A description of the copy number data, e.g., \"copy-number values for each gene (from Affymetrix SNP6).\".","data_filename: your datafile","gene_panel (Optional): gene panel stable id","cBioPortal also supports log2 copy number data. If your data is in log2, change the following fields:","datatype: LOG2-VALUE","stable_id: log2CNA"]},{"i":"example-8","l":"Example","p":["An example metadata file, e.g. meta_log2_cna.txt, would be:"]},{"i":"data-file-3","l":"Data file","p":["The log2 copy number data file follows the same format as expression data files. See Expression Data for a description of the expression data file format."]},{"i":"gistic-20-format-1","l":"GISTIC 2.0 Format","p":["GISTIC 2.0 outputs a tabular file similarly formatted to the cBioPortal format, called prefix_all_data_by_genes.txt. In this file the gene symbol is found in the Gene Symbol column, while Entrez gene IDs are in the Gene ID or Locus ID column. Please rename Gene Symbol to Hugo_Symbol and Gene ID or Locus ID to Entrez_Gene_Id. The Cytoband column can be kept in the table, but note that these values are ignored in cBioPortal. cBioPortal uses cytoband annotations from the map_location column in NCBI's Homo_sapiens.gene_info.gz when loading genes into the seed database."]},{"l":"Segmented Data","p":["A SEG file (segmented data; .seg or .cbs) is a tab-delimited text file that lists loci and associated numeric values. The segmented data file format is the output of the Circular Binary Segmentation algorithm (Olshen et al., 2004). This Segment data enables the 'CNA' lane in the Genomic overview of the Patient view (as can be seen in this example)."]},{"i":"meta-file-5","l":"Meta file","p":["The segmented metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: COPY_NUMBER_ALTERATION","datatype: SEG","reference_genome_id: Reference genome version. Supported values: \"hg19\"","description: A description of the segmented data, e.g., \"Segment data for the XYZ cancer study.\".","data_filename: your datafile"]},{"i":"example-9","l":"Example:","p":["An example metadata file, e.g. meta_cna_hg19_seg.txt, would be:"]},{"i":"data-file-4","l":"Data file","p":["The first row contains column headings and each subsequent row contains a locus and an associated numeric value. See also the Broad IGV page on this format."]},{"i":"example-10","l":"Example:","p":["An example data file which includes the required column header would look like:"]},{"l":"Expression Data","p":["An expression data file is a two dimensional matrix with a gene per row and a sample per column. For each gene-sample pair, a real number represents the gene expression in that sample."]},{"i":"meta-file-6","l":"Meta file","p":["The expression metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: MRNA_EXPRESSION","datatype: CONTINUOUS, DISCRETE or Z-SCORE","stable_id: see table below.","source_stable_id: Required when both conditions are true: (1) datatype= Z-SCORE and (2) this study contains GSVA data. Should contain stable_id of the expression file for which this Z-SCORE file is the statistic.","show_profile_in_analysis_tab: false (you can set to true if Z-SCORE to enable it in the oncoprint, for example).","profile_name: A name for the expression data, e.g., \"mRNA expression (microarray)\".","profile_description: A description of the expression data, e.g., \"Expression levels (Agilent microarray).\".","data_filename: your datafile","gene_panel (Optional): gene panel stable id"]},{"l":"Supported stable_id values for MRNA_EXPRESSION","p":["For historical reasons, cBioPortal expects the stable_id to be one of those listed in the following static set. The stable_id for continuous RNA-seq data has two options: rna_seq_mrna or rna_seq_v2_mrna. These options were added to distinguish between two different TCGA pipelines, which perform different types of normalization (RPKM and RSEM). However, for custom datasets either one of these stable_id can be chosen."]},{"i":"example-11","l":"Example","p":["An example metadata, e.g. meta_expression.txt file would be:"]},{"i":"data-file-5","l":"Data file","p":["For each gene (row) in the data file, the following columns are required in the order specified:","One or both of:","Hugo_Symbol: A HUGO gene symbol.","Entrez_Gene_Id: A Entrez Gene identifier.","And:","An additional column for each sample in the dataset using the sample id as the column header.","For each gene-sample combination, a value is specified:","A real number for each sample id (column) in the dataset, representing the expression value for the gene in the respective sample.","or NA for when the expression value for the gene in the respective sample could not (or was not) be measured (or detected)."]},{"l":"z-score instructions","p":["For mRNA expression data, we typically expect the relative expression of an individual gene and tumor to the gene's expression distribution in a reference population. That reference population is either all tumors that are diploid for the gene in question, or, when available, normal adjacent tissue. The returned value indicates the number of standard deviations away from the mean of expression in the reference population (Z-score). This measure is useful to determine whether a gene is up- or down-regulated relative to the normal samples or all other tumor samples. Note, the importer tool can create normalized (z-score) expression data on your behalf. Please visit the Z-Score normalization script wiki page for more information. A corresponding z-score metadata file would be something like:"]},{"i":"examples-of-data-files","l":"Examples of data files:","p":["An example data file which includes the required column header and leaves out Hugo_Symbol(recommended) would look like:","An example data file which includes both Hugo_Symbo and Entrez_Gene_Id would look like (supported, but not recommended as it increases the chances of errors regarding ambiguous gene symbols):","An example data file with only Hugo_Symbol column (supported, but not recommended as it increases the chances of errors regarding ambiguous gene symbols):"]},{"l":"Mutation Data","p":["When loading mutation data, the _sequenced case list is required. See the case list section."]},{"i":"meta-file-7","l":"Meta file","p":["The mutation metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: MUTATION_EXTENDED","datatype: MAF","stable_id: mutations","show_profile_in_analysis_tab: true","profile_name: A name for the mutation data, e.g., \"Mutations\".","profile_description: A description of the mutation data, e.g., \"Mutation data from whole exome sequencing.\".","data_filename: your data file","gene_panel (optional): gene panel stable id. See Gene panels for mutation data.","swissprot_identifier (optional): accession or name, indicating the type of identifier in the SWISSPROT column","variant_classification_filter (optional): List of Variant_Classifications values to be filtered out.","namespaces (optional): Comma-delimited list of namespaces to import."]},{"l":"Gene panels for mutation data","p":["Using the gene_panel property it is possible to annotate all samples in the MAF file as being profiled on the same specified gene panel.","Please use the Gene Panel Matrix file when:","Data contains samples that are profiled but no mutations are called. Also please add these to the _sequenced case list.","Multiple gene panels are used to profile the samples in the MAF file."]},{"l":"Variant classification filter","p":["The variant_classification_filter field can be used to filter out specific mutations. This field should contain a comma separated list of Variant_Classification values. By default, cBioPortal filters out Silent, Intron, IGR, 3'UTR, 5'UTR, 3'Flank and 5'Flank, except for the promoter mutations of the TERT gene. For no filtering, include this field in the metadata file, but leave it empty. For cBioPortal default filtering, do not include this field in the metadata file. Allowed values to filter out (mainly from Mutation Annotation Format page): Frame_Shift_Del, Frame_Shift_Ins, In_Frame_Del, In_Frame_Ins, Missense_Mutation, Nonsense_Mutation, Silent, Splice_Site, Translation_Start_Site, Nonstop_Mutation, 3'UTR, 3'Flank, 5'UTR, 5'Flank, IGR, Intron, RNA, Targeted_Region, De_novo_Start_InFrame, De_novo_Start_OutOfFrame, Splice_Region and Unknown"]},{"l":"Tumor seq allele ambiguity","p":["Bugs may exist in MAF data that make it ambiguous as to whether Tumor_Seq_Allele1 or Tumor_Seq_Allele2 should be seen as the variant allele to be used when a new mutation record is created and imported in cBioPortal. In such cases, preference is given to the tumor seq allele value that matches a valid nucleotide pattern ^[ATGC]*$ versus a null or empty value, or \"-\". For example, given Reference_Allele= \"G\", Tumor_Seq_Allele= \"-\", and Tumor_Seq_Allele2= \"A\", preference will be given to Tumor_Seq_Allele2. Using this same example with Tumor_Seq_Allele1= \"T\", preference will be given to Tumor_Seq_Allele1 if it does not match Reference_Allele, which in this case it does not.","When curating MAF data, it is best practice to leave Tumor_Seq_Allele1 empty if this information is not provided in your data source to avoid this ambiguity."]},{"l":"Namespaces","p":["The namespaces field can be used to specify additional MAF columns for import. This field should contain a comma separated list of namespaces. Namespaces can be identified as prefixes to an arbitrary set of additional MAF columns (separated with a period e.g ASCN.total_copy_number, ASCN.minor_copy_number). All columns with a prefix matching a namespace specified in the metafile will be imported; columns with an unspecified namespace will be ignored. If no additional columns beyond the required set need to be imported, the field should be left blank."]},{"i":"example-12","l":"Example","p":["An example metadata file would be:"]},{"i":"data-file-6","l":"Data file","p":["The cBioPortal mutation data file extends the Mutation Annotation Format(MAF) created as part of The Cancer Genome Atlas (TCGA) project, by adding extra annotations to each mutation record. This section describes:","How to create the cBioPortal mutation data file with a minimal MAF file using the Genome Nexus Annotation Pipeline.","The description of the cBioPortal mutation data file. You can also get the cBioPortal mutation data file from vcf using: vcf2maf."]},{"l":"Create the cBioPortal mutation data file with Genome Nexus with a minimal MAF file"},{"l":"Minimal MAF file format","p":["A minimal mutation annotations file can contain just the five genomic change columns plus one sample identifier column. From this minimal MAF, it is possible to create the cBioPortal mutation data file by running it through the Genome Nexus Annotation Pipeline.","Chromosome (Required): A chromosome number, e.g., \"7\".","Start_Position (Required): Start position of event.","End_Position (Required): End position of event.","Reference_Allele (Required): The plus strand reference allele at this position.","Tumor_Seq_Allele2 (Required): Primary data genotype.","Tumor_Sample_Barcode (Required): This is the sample ID. Either a TCGA barcode (patient identifier will be extracted), or for non-TCGA data, a literal SAMPLE_ID as listed in the clinical data file.","In addition to the above columns, it is recommended to have the read counts to calculate variant allele frequencies:","t_alt_count (Optional, but recommended): Variant allele count (tumor).","t_ref_count (Optional, but recommended): Reference allele count (tumor).","The following extra annotation columns are important for making sure mutation specific UI functionality works well in the portal:","Protein_position (Optional): (annotation column) Required to initialize the 3D viewer in mutations view","SWISSPROT (Optional): (annotation column) UniProtKB/SWISS-PROT name (formerly called ID) or accession code depending on the value of the swissprot_identifier metadatum, e.g. O11H1_HUMAN or Q8NG94. Is not required, but not having it may result in inconsistent PDB structure matching in mutations view."]},{"l":"Creating the cBioPortal mutation data file","p":["Once you have a minimal MAF you can run it through the Genome Nexus Annotation Pipeline. This tool runs annotates variants against the Genome Nexus Server, which in turn leverages Ensembl Variant Effect Predictor (VEP) and selects a single effect per variant. Protein identifiers will be mapped to UniProt canonical isoforms (see also this mapping file)."]},{"l":"cBioPortal mutation data file format","p":["1 column with the amino acid change.","1 These columns are currently not shown in the Mutation tab and Patient view.","32 columns from the TCGA MAF format.","4 columns with information on reference and variant allele counts in tumor and normal samples.","BAM_File1 (Optional): Not used.","Center (Optional): The sequencing center.","Chromosome (Required): A chromosome number, e.g., \"7\".","dbSNP_RS1 (Optional): Latest dbSNP rs ID.","dbSNP_Val_Status1 (Optional): dbSNP validation status.","End_Position (Optional, but recommended for additional features such as Cancer Hotspots annotations): End position of event.","Entrez_Gene_Id (Optional, but recommended): A Entrez Gene identifier.","HGVSp_Short (Required): Amino Acid Change, e.g. p.V600E.","Hugo_Symbol (Required): A HUGO gene symbol.","Match_Norm_Seq_Allele1 (Optional): Primary data.","Match_Norm_Seq_Allele2 (Optional): Primary data.","Match_Norm_Validation_Allele11 (Optional): Secondary data from orthogonal technology.","Match_Norm_Validation_Allele21 (Optional): Secondary data from orthogonal technology.","Matched_Norm_Sample_Barcode1 (Optional): The sample ID for the matched normal sample.","Mutation_Status (Optional): \"Somatic\" or \"Germline\" are supported by the UI in Mutations tab. \"None\", \"LOH\" and \"Wildtype\" will not be loaded. Other values will be displayed as text.","n_alt_count (Optional): Variant allele count (normal).","n_ref_count (Optional): Reference allele count (normal).","NCBI_Build (Required)1: The Genome Reference Consortium Build is used by a variant calling software. It must be \"GRCh37\" or \"GRCh38\" for a human, and \"GRCm38\" for a mouse.","Reference_Allele (Required): The plus strand reference allele at this position.","Score1 (Optional): Not used.","Sequence_Source1 (Optional): Molecular assay type used to produce the analytes used for sequencing.","Sequencer1 (Optional): Instrument used to produce primary data.","Sequencing_Phase1 (Optional): Indicates current sequencing phase.","Start_Position (Optional, but recommended for additional features such as Cancer Hotspots annotations): Start position of event.","Strand (Optional): We assume that the mutation is reported for the + strand.","t_alt_count (Optional): Variant allele count (tumor).","t_ref_count (Optional): Reference allele count (tumor).","The cBioPortal mutation data file format recognized by the portal has:","Tumor_Sample_Barcode (Required): This is the sample ID. Either a TCGA barcode (patient identifier will be extracted), or for non-TCGA data, a literal SAMPLE_ID as listed in the clinical data file.","Tumor_Seq_Allele1 (Optional): Primary data genotype.","Tumor_Seq_Allele2 (Required): Primary data genotype.","Tumor_Validation_Allele1 (Optional): Secondary data from orthogonal technology.","Tumor_Validation_Allele2 (Optional): Secondary data from orthogonal technology.","Validation_Method1 (Optional): The assay platforms used for the validation call.","Validation_Status (Optional): Second pass results from orthogonal technology. \"Valid\", \"Invalid\", \"Untested\", \"Inconclusive\", \"Redacted\", \"Unknown\" or \"NA\".","Variant_Classification (Required): Translational effect of variant allele, e.g. Missense_Mutation, Silent, etc.","Variant_Type 1(Optional): Variant Type, e.g. SNP, DNP, etc.","Verification_Status1 (Optional): Second pass results from independent attempt using same methods as primary data source. \"Verified\", \"Unknown\" or \"NA\"."]},{"l":"Custom driver annotations","p":["It is possible to manually add columns for defining custom driver annotations. These annotations can be used to complement or replace default driver annotation resources OncoKB and HotSpots.","cbp_driver (Optional): \"Putative_Driver\", \"Putative_Passenger\", \"Unknown\", \"NA\" or \"\" (empty value). This field must be present if the cbp_driver_annotation is also present in the MAF file.","cbp_driver_annotation (Optional): Description field for the cbp_driver value (limited to 80 characters). This field must be present if the cbp_driver is also present in the MAF file. This field is free text. Example values for this field are: \"Pathogenic\" or \"VUS\".","cbp_driver_tiers (Optional): Free label/category that marks the mutation as a putative driver such as \"Driver\", \"Highly actionable\", \"Potential drug target\". . This field must be present if the cbp_driver_tiers_annotation is also present in the MAF file. In the OncoPrint view's Mutation Color dropdown menu, these tiers are ordered alphabetically. This field is free text and limited to 20 characters. For mutations without a custom annotation, leave the field blank or type \"NA\".","cbp_driver_tiers_annotation (Optional): Description field for the cbp_driver_tiers value (limited to 80 characters). This field must be present if the cbp_driver_tiers is also present in the MAF file. This field can not be present when the cbp_driver_tiers field is not present.","The cbp_driver column flags the mutation as either driver or passenger. In cBioPortal, passenger mutations are also known as variants of unknown significance (VUS). The cbp_driver_tiers column assigns an annotation tier to the mutation, such as \"Driver\", \"Highly actionable\" or \"Potential drug target\". When a tier is selected, mutations with that annotation are highlighted as driver. Both types of custom annotations contain a second column with the suffix _annotation, to add a description. This is displayed in the tooltip that appears when hovering over the sample's custom annotation icon in the OncoPrint view.","You can learn more about configuring these annotations in the portal.properties documentation. When properly configured, the customized annotations appear in the \"Mutation Color\" menu of the OncoPrint view: schreenshot mutation color menu"]},{"l":"Adding your own mutation annotation columns","p":["Additional mutation annotation columns can be added to the cBioPortal mutation data file. In this way, the portal will parse and store your own MAF fields in the database. For example, mutation data that you find on cBioPortal.org comes from MAF files that have been further enriched with information from mutationassessor.org, which leads to a \"Mutation Assessor\" column in the mutation table.","See Custom namespace columns for more information on adding custom columns to data files."]},{"i":"allele-specific-copy-number-ascn-annotations","l":"Allele specific copy number (ASCN) annotations","p":["Allele specific copy number (ASCN) annotation is also supported and may be added using namespaces, described here. If ASCN data is present in the cBioPortal mutation data file, the deployed cBioPortal instance will display additional columns in the mutation table showing ASCN data.","The ASCN columns below are optional by default. If ascn is a defined namespace in meta_mutations_extended.txt, then these columns are ALL required.","ASCN.ASCN_METHOD (Optional): Method used to obtain ASCN data e.g \"FACETS\".","ASCN.CCF_EXPECTED_COPIES (Optional): Cancer-cell fraction if mutation exists on major allele.","ASCN.CCF_EXPECTED_COPIES_UPPER (Optional): Upper error for CCF estimate.","ASCN.EXPECTED_ALT_COPIES (Optional): Estimated number of copies harboring mutant allele.","ASCN.CLONAL (Optional): \"Clonal\", \"Subclonal\", or \"Indeterminate\".","ASCN.TOTAL_COPY_NUMBER (Optional): Total copy number of the gene.","ASCN.MINOR_COPY_NUMBER (Optional): Copy number of the minor allele.","ASCN.ASCN_INTEGER_COPY_NUMER (Optional): Absolute integer copy-number estimate."]},{"l":"Example cBioPortal mutation data file","p":["An example cBioPortal mutation data file can be found in the cBioPortal test study study_es_0."]},{"l":"Filtered mutations","p":["A special case for Entrez_Gene_Id=0 and Hugo_Symbol=Unknown: when this combination is given, the record is parsed in the same way as Variant_Classification=IGR and therefore filtered out."]},{"l":"Methylation Data","p":["The Portal expects a single value for each gene in each sample, usually a beta-value from the Infinium methylation array platform."]},{"i":"meta-file-8","l":"Meta file","p":["The methylation metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: METHYLATION","datatype: CONTINUOUS","stable_id: \"methylation_hm27\" or \"methylation_hm450\" (depending on platform).","show_profile_in_analysis_tab: false","profile_name: A name for the methylation data, e.g., \"Methlytation (HM27)\".","profile_description: A description of the methlytation data, e.g., \"Methylation beta-values (HM27 platform). For genes with multiple methylation probes, the probe least correlated with expression is selected.\".","data_filename: your datafile","gene_panel (Optional): gene panel stable id"]},{"i":"example-13","l":"Example","p":["An example metadata file would be:"]},{"i":"data-file-7","l":"Data file","p":["The methylation data file follows the same format as expression data files. See Expression Data for a description of the expression data file format. The Portal expects a single value for each gene in each sample, usually a beta-value from the Infinium methylation array platform."]},{"l":"Protein level Data","p":["Protein expression measured by reverse-phase protein array or mass spectrometry. Antibody-sample pairs, with a real number representing the protein level for that sample."]},{"i":"meta-file-9","l":"Meta file","p":["The protein level metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: PROTEIN_LEVEL","datatype: LOG2-VALUE or Z-SCORE","stable_id: rppa, rppa_Zscores, protein_quantification or protein_quantification_zscores","show_profile_in_analysis_tab: false ( true for Z-SCORE datatype)","profile_name: A name for the RPPA data, e.g., \"RPPA data\".","profile_description: A description of the RPPA data, e.g., \"RPPA levels.\".","data_filename: your datafile","gene_panel (Optional): gene panel stable id","An example metadata file would be:","NB: You also need a Z-SCORE file if you want protein levels to be available in query UI and in Oncoprint visualization. E.g.:"]},{"i":"data-file-8","l":"Data file","p":["A protein level data file is a two dimensional matrix with a RPPA antibody per row and a sample per column. For each antibody-sample pair, a real number represents the protein level for that sample. The antibody information can contain one or more HUGO gene symbols and/or entrez gene identifiers, separated by a space, and an antibody ID pair separated by the \"|\" symbol."]},{"i":"example-14","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"l":"Structural Variant Data","p":["The cBioPortal can load all kinds of structural variant data but at the moment only a subset of them, fusions, are displayed."]},{"i":"meta-file-10","l":"Meta file","p":["The structural variant metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: STRUCTURAL_VARIANT","datatype: SV","stable_id: structural_variants","show_profile_in_analysis_tab: true.","profile_name: A name for the fusion data, e.g., \"Structural Variants\".","profile_description: A description of the structural variant data.","data_filename: your datafile (e.g. data_sv.txt)","gene_panel (Optional): gene panel stable id","An example metadata file would be:"]},{"i":"data-file-9","l":"Data file","p":["0","2","2078","21","3032067","39","39842043","3to5","3to5 or 5to3 or 5to5 or 3to3","4","41","42874110","45556","5_Prime_UTR,3_Prime_UTR,Promoter,Exon,Intron","55","63","7113","93891","A structural variant data file is a tab-delimited file with one structural variant per row. For each structural variant (row) in the data file, the following tab-delimited values are required:","Allowed Values","Annotation","Antisense fusion, Deletion within transcript: mid-exon, Duplication of 1 exon: in frame","Any comments/free text.","As defined in the clinical sample file.","Breakpoint_Type","Chromosome of Gene 1.(strongly recommended field)","Chromosome of Gene 2.","Class","Comments","Connection_Type","Deletion","Deletion,Duplication,Insertion,Inversion,Translocation.","Description of the event. For a fusion event, fill in Fusion. It is a free text.","Description of this event at site 1. This could be the location of 1st breakpoint in case of a fusion event","Description of this event at site 2. This could be the location of the 2nd breakpoint in case of a fusion event.","DNA_Support","Ensembl transcript ID of gene 1.","Ensembl transcript ID of gene 2.","ENST00000288319","ENST00000398585","Entrez Gene identifier of gene 1. (strongly recommended field)","Entrez Gene identifier of gene 2.","ERG","Event_Info","Example Values","Exon","Field","For an example see datahub. For an example see datahub. At a minimum Sample_Id, either Site1_Hugo_Symbol/ Site1_Entrez_Gene_Id or Site2_Hugo_Symbol/ Site2_Entrez_Gene_Id and SV_Status are required. For the stuctural variant tab visualization (still in development) one needs to provide those field as well as Site1_Ensembl_Transcript_Id, Site2_Ensembl_Transcript_Id, Site1_Region and Site2_Region. Some of the other columns are shown at several other pages on the website. The Class, Annotation and Event_Info columns are shown prominently on several locations. Note: We strongly recommend all the data providers to submit genomic locations in addition to required fields for future visualization features.","Free Text","Free text description of the gene or transcript rearrangement.","Fusion detected from DNA sequence data, \"Yes\" or \"No\".","Genomic position of breakpoint of Gene 1.(strongly recommended field)","Genomic position of breakpoint of Gene 2.","GRCh37","GRCh37,GRCh38","HUGO gene symbol of gene 1. One might call this the left site(3’) as well. (strongly recommended field)","HUGO gene symbol of gene 2.One might call this the right site(5’) as well.","In_frame, Out-of-frame,Frameshift","Intron of ERG(-): 6Kb after exon 1","Intron of TMPRSS2(-): 511bp before exon 3","Length of the structural variant in number of bases.","NCBI_Build","Normal_Paired_End_Read_Count","Normal_Read_Count","Normal_Split_Read_Count","Normal_Variant_Count","Number of Site 1 region e.g. exon 2.(strongly recommended field)","Number of Site 2 region e.g. exon 4.","Out-of-frame","PRECISE","PRECISE or IMPRECISE which explain the resolution. Fill in PRECISE if the breakpoint resolution is known down to the exact base pair.","PRECISE/IMPRECISE","Protein fusion: out of frame (TMPRSS2-ERG)","q22.2","q22.3","RNA_Support","Sample_1","Sample_Id (Required)","Site1_Chromosome","Site1_Contig","Site1_Description","Site1_Ensembl_Transcript_Id","Site1_Entrez_Gene_Id","Site1_Hugo_Symbol","Site1_Position","Site1_Region","Site1_Region_Number","Site2_Chromosome","Site2_Contig","Site2_Description","Site2_Effect_On_Frame","Site2_Ensembl_Transcript_Id","Site2_Entrez_Gene_Id","Site2_Hugo_Symbol","Site2_Position","Site2_Region","Site2_Region_Number","SOMATIC","SOMATIC or GERMLINE","SV_Length","SV_Status (Required)","The contig of Site 1.(strongly recommended field)","The contig of Site 2.","The effect on frame reading in gene 2. Frame_Shift or InFrame,but it is a free text.","The NCBI assembly. Only one assembly per study can be used, see study metadata.","The number of paired-end reads of the normal tissue that support the call.","The number of paired-end reads of the tumor tissue that support the call. [Tumor Paired End Read Count is the same as “Spanning Fragments”.]","The number of reads of the normal tissue that have the variant/allele.","The number of reads of the tumor tissue that have the variant/allele.","The number of split reads of the normal tissue that support the call.","The number of split reads of the tumor tissue that support the call.[Tumor Split Read Count is the same as “Junction Reads”.]","The total number of reads of the normal tissue.","The total number of reads of the tumor tissue.","TMPRSS2","TMPRSS2 (NM_001135099) - ERG (NM_001243428) fusion (TMPRSS2 exons 1-2 fused with ERG exons 4-11):(c.126+879:TMRPSS2_c.40-63033:ERGdel)","Tumor_Paired_End_Read_Count","Tumor_Read_Count","Tumor_Split_Read_Count","Tumor_Variant_Count","We advise using one of these {5_PRIME_UTR,3_PRIME_UTR,PROMOTER,EXON,INTRON},but it is a free text.","We advise using one of these {5_Prime_UTR,3_Prime_UTR,Promoter,Exon,Intron},but it is a free text. (strongly recommended field)","We advise using one of these terms [DELETION, DUPLICATION, INSERTION, INVERSION or TRANSLOCATION], but it is free text.","Which direction the connection is made (3' to 5', 5' to 3', etc)","Yes","Yes or No"]},{"l":"Adding your own structural variant columns","p":["Additional mutation annotation columns can be added to the structural variant data file. In this way, the portal will parse and store your own structural variant fields in the database.","See Custom namespace columns for more information on adding custom columns to data files."]},{"l":"Fusion Data","p":["⚠️ DEPRECATED Use the: SV format instead"]},{"i":"meta-file-11","l":"Meta file","p":["The fusion metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: FUSION","datatype: FUSION","stable_id: fusion","show_profile_in_analysis_tab: true.","profile_name: A name for the fusion data, e.g., \"Fusions\".","profile_description: A description of the fusion data.","data_filename: your datafile","gene_panel (Optional): gene panel stable id"]},{"i":"example-15","l":"Example","p":["An example metadata file would be:"]},{"i":"data-file-10","l":"Data file","p":["A fusion data file is a two dimensional matrix with one gene per row. For each gene (row) in the data file, the following tab-delimited values are required in the order specified:","Hugo_Symbol: A HUGO gene symbol.","Entrez_Gene_Id: A Entrez Gene identifier.","Center: The sequencing center.","Tumor_Sample_Barcode: This is the sample ID.","Fusion: A description of the fusion, e.g., \"TMPRSS2-ERG fusion\".","DNA_support: Fusion detected from DNA sequence data, \"yes\" or \"no\".","RNA_support: Fusion detected from RNA sequence data, \"yes\" or \"no\".","Method: Fusion detected algorithm/tool.","Frame: \"in-frame\" or \"frameshift\".","Fusion_Status (OPTIONAL): An assessment of the mutation type (i.e., \"SOMATIC\", \"GERMLINE\", \"UNKNOWN\", or empty)","Note: If a fusion event includes a gene, e.g., Hugo_Symbol or Entrez_Gene_Id, that is not profiled, the event will be filter out during import into the database.","An example data file which includes the required column header would look like:"]},{"l":"Case Lists","p":["Case lists are used to define sample lists that can be selected on the query page. Some case lists have specific functionality, but it's also possible to add custom case lists. The case list files should be placed in a sub-directory called case_lists which exists alongside all the other cancer study data. The case list file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","stable_id: it must contain the cancer_study_identifier followed by an underscore. Typically, after this a relevant suffix, e.g., _custom, is added. There are some naming rules to follow if you want the case list to be selected automatically in the query UI base on the selected sample profiles. See subsection below.","case_list_name: A name for the patient list, e.g., \"All Tumors\".","case_list_description: A description of the patient list, e.g., \"All tumor samples (825 samples).\".","case_list_ids: A tab-delimited list of sample ids from the dataset.","case_list_category: Optional alternative way of linking your case list to a specific molecular profile. E.g. setting this to all_cases_with_cna_data will signal to the portal that this is the list of samples to be associated with CNA data in some of the analysis."]},{"i":"example-16","l":"Example","p":["An example case list file would be:"]},{"l":"Case list stable id suffixes","p":["In order for sample counts to propagate to the data sets widget on the home page and the table on the Data Sets page, the following case list suffixes need to be used in the stable_id property (e.g. brca_tcga_pub_sequenced). This is also needed for correct statistics in the Study view page when calculating the frequency of CNA and of mutations per gene in the respective summary tables.","Sequenced: _sequenced. When only a mutation profile is selected on the query page, this is the default case list. Also used in the Study Summary to calculate the proportion of samples with mutations.","CNA: _cna. When only a CNA profile is selected on the query page, this is the default case list. Also used in the Study Summary to calculate the proportion of samples with CNA.","Sequenced and CNA: _cnaseq. When a mutation and CNA genetic profile are selected on the query page, this is the default case list.","mRNA (microarray): _mrna. When only a mRNA (microarray) profile is selected on the query page, this is the default case list.","mRNA (RNA-Seq): _rna_seq_mrna. When only a mRNA (RNA-Seq) profile is selected on the query page, this is the default case list.","mRNA (RNA-SeqV2): _rna_seq_v2_mrna. When only a mRNA (RNA-SeqV2) profile is selected on the query page, this is the default case list.","mRNA normal: _normal_mrna. Used for the datasets page to calculate the number of normal samples.","mRNA normal: _microrna. Used for the datasets page to calculate the number of microRNA samples.","Methylation (HM27): _methylation_hm27.","RPPA: _rppa. When only a RPPA profile is selected on the query page, this is the default case list.","Sequenced, CNA and mRNA: _3way_complete When a mutation, CNA and mRNA profile are selected on the query page, this is the default case list.","SV: _sv. When a structural variant profile is selected on the query page, this is the default case list. Also used in the Study Summary to calculate the proportion of samples with fusions.","All: _all. If you are not using add_global_case_list attribute in Study metadata, you need to add this case list."]},{"l":"Required case lists","p":["Some case lists are required:","_all. This can be generated by the importer if you set the attribute add_global_case_list to true in the Study metadata.","_sequenced. This case list is required when loading mutation data.","_cna. This case list is required when loading discrete cna data."]},{"l":"Case list categories","p":["These are the valid case lists categories for case_list_category: in the meta file.","all_cases_in_study","all_cases_with_mutation_data","all_cases_with_cna_data","all_cases_with_log2_cna_data","all_cases_with_methylation_data","all_cases_with_mrna_array_data","all_cases_with_mrna_rnaseq_data","all_cases_with_rppa_data","all_cases_with_microrna_data","all_cases_with_mutation_and_cna_data","all_cases_with_mutation_and_cna_and_mrna_data","all_cases_with_gsva_data","all_cases_with_sv_data","other"]},{"l":"Timeline Data","p":["The timeline data is a representation of the various events that occur during the course of treatment for a patient from initial diagnosis. In cBioPortal timeline data is represented as one or more tracks in the patient view. Each main track is based on an event type, such as \"Specimen\", \"Imaging\", \"Lab_test\", etc.","Attention: some clinical attributes affect the timeline visualization. Please check the Clinical Data section for more information.","This type data is not yet being validated. It can, however, be uploaded."]},{"i":"meta-file-12","l":"Meta file","p":["Each event type requires its own meta file. A timeline meta file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: CLINICAL","datatype: TIMELINE","data_filename: your datafile","An example metadata file would be:"]},{"i":"data-file-11","l":"Data file","p":["And optional columns with special effects:","Each event type requires its own data file, which contains all the events that each patient undergoes. The data format used for timeline data is extremely flexible. There are four required columns:","EVENT_TYPE: the category of the event. You are free to define any type of event here. For several event types cBioPortal has column naming suggestions and for several events there are column names which have special effects. See event types for more information.","Example column:","HYPERLINK_OPEN_IN_NEWTAB: [Link text Here](https://link-url-here.org/path:blank)","link-in-timeline-data","PATIENT_ID: the patient ID from the dataset","SOME_HYPERLINK: [Link text Here](https://link-url-here.org)","SPECIMEN_REFERENCE_NUMBER: SPECIMEN_REFERENCE_NUMBER is no longer supported. Please replace this column header with SAMPLE_ID.","START_DATE: the start point of any event, calculated in * days from the date of diagnosis (which will act as point zero on the timeline scale)","STOP_DATE: The end date of the event is calculated in days from the date of diagnosis (which will act as point zero on the timeline scale). If the event occurs over time (e.g. a Treatment, ...) the STOP_DATE column should have values. If the event occurs at a time point (e.g. a Lab_test, Imaging, ...) the STOP_DATE is still mandatory, but the values should be blanks.","STYLE_COLOR: when this column has a hexadecimal color value (e.g. #ffffff), it will be used as the color for rendering this event.","STYLE_SHAPE: when this column has a valid value, this event will be rendered using that shape. The valid shapes are circle, square, triangle, diamond, star, and camera.","The external link can be opened in a new tab, instead of an IFRAME within the same window/tab. To do this, the string :blank is to be added as a suffix at the end of the URL.","To embed hyperlinks in custom columns:","Values in custom columns can include markdown for hyperlinks that will show up in event tooltips, allowing users to click through to external resources associated with events."]},{"l":"Event Types","p":["AGENT_CLASS: This allows you to classify your agents into useful groups.","AGENT: for medical therapies, the agent is defined with number of cycles if applicable and for radiation therapy, the agent is defined as standard dose given to the patient during the course.","As previously mentioned, the EVENT_TYPE can be anything. However, several event types have columns with special effects. Furthermore, for some event types cBioPortal has column naming suggestions.","Based on different cancer types you can add additional data here.","DIAGNOSTIC_TYPE_DETAILED: Detailed description of the event type.","DIAGNOSTIC_TYPE: This attribute will cover the different diagnostics tools used (for example: MRI, CT scan etc.)","EVENT_TYPE: IMAGING","EVENT_TYPE: LAB_TEST","EVENT_TYPE: SPECIMEN","EVENT_TYPE: STATUS","EVENT_TYPE: TREATMENT","RESULT: corresponding value of the test","RESULT: Results of the diagnostic tests","SOURCE: Where the status was monitored.","SOURCE: Where was the Imaging done.","SOURCE: Where was the specimen collection done.","Special: all dots in the IMAGING track are squares.","Special: When using the AGENT and SUBTYPE columns, each agent and subtype will be split into its own track.","Special: When using the TEST and RESULT columns, each test gets its own track. Any TEST that has only numerical RESULT values will be rendered as a line chart.","SPECIMEN_SITE: This is the site from where the specimen was collected.","SPECIMEN_TYPE: This can either be tissue or blood.","STATUS: If the EVENT_TYPE is status, data is entered under STATUS to define either the best response from the treatment or if there is a diagnosis of any stage progression etc.","SUBTYPE: Depending upon the TREATMENT_TYPE, this can either be Chemotherapy, Hormone Therapy, Targeted Therapy etc. (for Medical Therapies) or WPRT, IVRT etc. (for Radiation Therapies).","Suggested columns","TEST: type of test performed","TREATMENT_TYPE: This can be either Medical Therapy or Radiation Therapy."]},{"l":"Clinical Track Ordering","p":["Clinical tracks are ordered as follows (if available):","Specimen","Surgery","Status","Diagnostics","Diagnostic","Imaging","Lab_test","Treatment","First custom event","etc."]},{"i":"example-17","l":"Example","p":["An example timeline file for SPECIMEN would be:","Assuming the sample identifiers were also defined in the clinical file, this will lead to a timeline track with numbered specimen samples.","An example timeline file for Lab_test would be:","This will lead to a timeline track for Lab_test with an additional subtrack specifically for PSA. PSA's events will be sized based on the result."]},{"i":"gistic-20-data","l":"GISTIC 2.0 Data","p":["Running GISTIC 2.0 on e.g. GenePattern not only provides the Discrete Copy Number Data, but also provides an amp_genes and a del_genes file. These cannot be directly imported into cBioPortal, but first have to be converted to a different file format. An example can be found in the ACC TCGA study on cBioPortal Datahub.","After uploading a gistic_amp and/or gistic_del file, significantly recurrently copy-number altered genes will be labeled and available for query in the \"CNA Genes\" table on the study view, like in the TCGA Legacy / Firehose data set for bladder cancer:","recurrently-altered-genes-table"]},{"i":"meta-file-13","l":"Meta file","p":["The Gistic metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: GISTIC_GENES_AMP or GISTIC_GENES_DEL","datatype: Q-VALUE","reference_genome_id: reference genome version. Supported values: \"hg19\"","data_filename: your datafile","An example metadata file would be:"]},{"i":"data-file-12","l":"Data file","p":["The following fields from the generated Gistic file are used by the cBioPortal importer:","chromosome: chromosome on which the region was found, without the chr prefix","peak_start: start coordinate of the region of maximal amplification or deletion within the significant region","peak_end: end coordinate of the region of maximal amplification or deletion within the significant region","genes_in_region: comma-separated list of HUGO gene symbols in the 'wide peak' (allowing for single-sample errors in the peak boundaries)","amp: 1 for amp, 0 for del","cytoband: cytogenetic band specification of the region, including chromosome (Giemsa stain)","q_value: the q-value of the peak region"]},{"i":"example-18","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"l":"MutSig Data","p":["MutSig stands for \"Mutation Significance\". MutSig analyzes lists of mutations discovered in DNA sequencing, to identify genes that were mutated more often than expected by chance given background mutation processes. You can download mutsig from broadinstitute(MutSigCV 1.4 is available) or run mutsig (MutSigCV 1.2 is available) using GenePattern.","Note: The tcga files that are uploaded to cBioPortal are generated using MutSig2.0. This version is not available outside broadinstitute.","The MutSigCV 1.2 output is different from the MutSig2.0 header. TODO: test the 1.4 version. Requires > 10GB of memory","After uploading a MutSig file, significantly recurrently mutated genes will be labeled and available for query in the \"Mutated Genes\" table on the study view, like in the TCGA Legacy / Firehose data set for bladder cancer:","recurrently-mutated-genes-table","This type data is not yet being validated. It can, however, be uploaded."]},{"i":"meta-file-14","l":"Meta file","p":["The MutSig metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: MUTSIG","datatype: Q-VALUE","data_filename: your datafile","An example metadata file would be:"]},{"i":"data-file-13","l":"Data file","p":["The following fields from a MutSig file are used by the cBioPortal importer:","rank","gene: this is the HUGO symbol","N (or Nnon): bases covered","n (or nnon): number of mutations","p: result of testing the hypothesis that all of the observed mutations in this gene are a consequence of random background mutation processes, taking into account the list of bases that are successfully interrogated by sequencing (i.e., “covered”) and the list of observed somatic mutations, as well as the length and composition of the gene in addition to the background mutation rates in different sequence contexts ( https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3059829/)","q: p value correct for multiple testing"]},{"i":"example-19","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"l":"Gene Panel Data","p":["Gene panel functionality can specify which genes are assayed on a panel and assign samples and genetic profiles (such as mutation data) to a panel.","To include gene panel data in your instance, the following data and/or configurations can be used:","Gene panel file: This file contains the genes on the gene panel. A panel can be used for multiple studies within the instance and should be loaded prior to loading a study with gene panel data. For information on the format and import process please visit: Import-Gene-Panels.","Gene panel matrix file: This file is used to specify which samples are sequenced on which gene panel in which genetic profile. This is recommended for mutation and structural variant data, because the MAF and structural variant formats are unable to include samples which are sequenced but contain no called mutations, and only a single gene panel can be defined in the meta file. For other genetic profiles, columns can be added to specify their gene panel, but a property can also be added to their respective meta file, because these data files contain all profiled samples. Although the gene panel matrix functionality overlaps with the case list functionality, a case list for mutations (_sequenced) and Structural variants (_sv) is also required.","Gene panel property in meta file: Adding the gene_panel: property to the meta file of data profile will assign all samples from that profile to the gene panel. In this case it is not necessary to include a column for this profile in the gene panel matrix file."]},{"l":"Gene Panel Matrix file"},{"l":"Columns and rows","p":["The gene panel matrix file contains a list of samples in the first column, and an additional column for each profile in the study using the stable_id as the column header. These stable_id's should match the ones in their respective meta files, for example mutations for mutation data and gistic for discrete CNA data. Columns should be separated by tabs. Fusion events are saved in the mutation table in the cBioPortal database, so they should be included in the mutations column. As described above, genetic profiles other than mutation and fusion data profiles can use the gene_panel: meta property if all samples are profiled on the same gene panel."]},{"l":"Values","p":["For each sample-profile combination, a gene panel should be specified. Please make sure this gene panel is imported before loading the study data. When the sample is not profiled on a gene panel, or if the sample is not profiled at all, use NA as value. If the sample is profiled for mutations, make sure it is also in the _sequenced case list."]},{"i":"example-20","l":"Example","p":["An example file would look like this:","SAMPLE_ID","mutations","gistic","SAMPLE_ID_1","IMPACT410","SAMPLE_ID_2","SAMPLE_ID_3","NA"]},{"i":"meta-file-15","l":"Meta file","p":["The gene panel matrix file requires a meta file, which should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: GENE_PANEL_MATRIX","datatype: GENE_PANEL_MATRIX","data_filename: your datafile","Example:"]},{"l":"Gene panel property in meta file","p":["If all samples in a genetic profile have the same gene panel associated with them, an optional field can be specified in the meta data file of that datatype called gene_panel:. If this is present, all samples in this data file will be assigned to this gene panel for this specific profile."]},{"l":"Gene Set Data","p":["A description of importing gene sets (which are required before loading gene set study) can be found here. This page also contains a decription to import gene set hierarchy data, which is required to show a hierarchical tree on the query page to select gene sets.","cBioPortal supports GSVA scores and GSVA-like scores, such as ssGSEA. The Gene Set Variation Analysis method in R (GSVA, Hänzelmann, 2013) can calculate several types of scores (specified with the methods= argument) and outputs a score between -1 and 1. The GSVA method also calculates a p-value per score using a bootstrapping method.","To import the GSVA(-like) data, a score and p-value data file are required. It is important that the dimensions of the score and p-value file are the same and that they contain the same gene sets and samples. Both data files require a meta file."]},{"l":"GSVA score meta file","p":["The meta file will be similar to meta files of other genetic profiles, such as mRNA expression. For both GSVA and GSVA-like scores, GSVA-SCORE is used as datatype and gsva_scores is used as stable_id.","Required fields:","Example:"]},{"l":"GSVA score data file","p":["The data file will be a simple tab separated format, similar to the expression data file: each sample is a column, each gene set a row, each cell contains the GSVA score for that sample x gene set combination.","The first column is geneset_id and contains the names of the gene sets. Gene set names should be formatted in uppercase. The other columns are sample columns: An additional column for each sample in the dataset using the sample id as the column header.","The cells contain the GSVA(-like) score: which is real number, between -1.0 and 1.0, representing the score for the gene set in the respective sample, or NA when the score for the gene set in the respective sample could not be (or was not) calculated. Example with 2 gene sets and 3 samples:"]},{"l":"GSVA p-value meta file","p":["For both GSVA and GSVA-like p-values, P-VALUE is used as datatype and gsva_pvalues is used as stable_id.","Required fields:","Example:"]},{"l":"GSVA p-value data file","p":["The data file will be a simple tab separated format, similar to the score file: each sample is a column, each gene set a row, each cell contains the p-value for the score found for sample x gene set combination.","The first column is geneset_id and contains the names of the gene sets. Gene set names should be formatted in uppercase. The other columns are sample columns: An additional column for each sample in the dataset using the sample id as the column header.","The cells contain the p-value for the GSVA score: A real number, between 0.0 and 1.0, representing the p-value for the GSVA score calculated for the gene set in the respective sample, or NA when the score for the gene is also NA. Example with 2 gene sets and 3 samples:"]},{"l":"Study Tags file","p":["YAML or JSON file which contains extra information about the cancer study. No compulsory fields are required for this file (free-form). To enable this feature, you need to add a line in the cancer study meta file with tags_file: followed the YAML/JSON file name. The information on the YAML or JSON file will be displayed in a table when mousing over a tag logo in the studies on the query page."]},{"l":"Generic Assay","p":["Generic Assay is a two dimensional matrix generalized to capture non-genetic measurements per sample. Instead of a gene per row and a sample per column, a Generic Assay file contains a generic entity per row and a sample per column. A generic entity is defined by the data curator and generally means something other than a gene. Some examples include, treatment response or mutational signatures. For each generic entity - sample pair, a (real number / text / binary value) represents a captured measurement."]},{"l":"Generic Assay meta file","p":["The generic assay metadata file should contain the following fields:","Example:"]},{"l":"Note on stable_id","p":["The stable_id for the generic assay datatype is a user defined field. The only requirement is that the stable_id is unique across all metafiles in the study."]},{"l":"Note on pivot_threshold_value","p":["The pivot_threshold_value is an arbitrary value that specifies a critical boundary that distinguish important from unimportant values in a generic assay profile. This boundary will be used in different visualizations to highlight important observations. Whether smaller or larger values are considered to be important can be controlled with the value_sort_order field (see below).","In a heatmap for a generic assay profile important values are shown in darker shades of blue, whereas unimportant values are shown in darker shades of red. The values represented by the pivot_threshold_value is shown in white. When defined, the pivot_threshold_value will always be included in the legend, even when all datapoints are all more extreme. In the Waterfall plot in Plots tab of Results view pivot_threshold_value determines the boundary between up- and downward deflections in the plot.","When no sensible idea exists for a boundary between important and unimportant observations the pivot_threshold_value field should not be defined (excluded from meta file). In this case all values will be shown as shades of blue in the heatmap and the waterfall plot will show up- and downward deflections around 0."]},{"l":"Note on value_sort_order","p":["The value_sort_order field can be used to indicate whether small or large value are considered to be more important. When value_sort_order is ASC smaller values are considered to be more important. When value_sort_order is DESC larger values are considered to be more important.","The default value for value_sort_order is ASC.","The value_sort_order is used by the Oncoprint when aggregating generic assay data in a tooltip that covers multiple samples from a single patient. When value_sort_order is ASC the sample with the smallest response value will be shown for that patient. When value_sort_order is DESC the sample with the largest value will be shown for that patient.","In the heatmap for a generic assay profile in results view data points with smaller values will show as darker blue when the value_sort_order is ASC. When value_sort_order is DESC larger values are assigned a darker blue color.","The value_sort_order is used by the Waterfall plot for orientation of the x-axis so that important observations are shown at the left side of the plot. When value_sort_order is ASC the x-axis will be in ascending order with smaller values to the left. When value_sort_order is DESC the x-axis will be in descending order with larger values to the left."]},{"l":"Note on generic_entity_meta_properties","p":["All meta properties must be specified in the generic_entity_meta_properties field. Every meta property listed here must appear as a column header in the corresponding data file. It's highly recommend to add NAME, DESCRIPTION and an optional URL to get the best visualization on OncoPrint tab and Plots tab."]},{"l":"Note on patient_level","p":["Generic Assay data will be considered sample_level data if the patient_level property is missing or set to false. In addition, the patient or sample identifiers need to be included in the Clinical Data file."]},{"l":"Note on Generic Assay genetic_alteration_type and datatype","p":["All generic assay data is registered to be of the type of genetic_alteration_type and data type can choose from LIMIT-VALUE, CATEGORICAL and BINARY.","LIMIT-VALUE: This datatype is intended to be used for any numerical data set with similar structure (entities measured in samples). The LIMIT-VALUE is validated to contain any continuous number optionally prefixed with a '>' or '<' threshold symbol (e.g., '>8.00').","CATEGORICAL (under development): This datatype is intended to be used for any categorical data set with similar structure (entities measured in samples). Any text is allowed in CATEGORICAL.","BINARY (under development): This datatype is intended to be used for any binary data set with similar structure (entities measured in samples). The BINARY is validated to contain only reserved text ( true, false, yes, no).","If the value for the generic entity in the respective sample could not (or was not) be measured (or detected), the value should be 'NA' or leave that cell blank."]},{"l":"Generic Assay data file","p":["The data file will be a simple tab separated format, similar to the expression data file: each sample is a column, each generic entity is a row, each cell contains values for that generic entity x sample combination.","For each generic entity (row) in the data file, the following columns are required in the order specified:","ENTITY_STABLE_ID: Any unique identifier using a combination of alphanumeric characters, _ and -.","And:","An additional column for each generic_entity_meta_properties in the metafile, using the property name as the column header (e.g., 'NAME').","An additional column for each sample in the dataset using the sample id as the column header.","Example with 3 generic entities and 3 samples:"]},{"l":"Arm Level CNA Data","p":["Arm-level copy-number data is a predefined subtype of Generic Assay Data.","Allowed values for Arm-level copy-number data are Loss, Gain, and Unchanged, use NA or leave the cell blank to indicate a missing value.","Please find example file format here: Meta file example and Data file example"]},{"l":"Mutational Signature Data","p":["Mutational Signature data is a predefined subtype of Generic Assay Data. Setting generic_assay_type: MUTATIONAL_SIGNATURE in the meta file will make cBioPortal interpret the data as Mutational Signature data."]},{"l":"Mutational Signature meta files","p":["The mutational signature meta files follow the same convention as the Generic Assay Meta file, however there are some key differences:","genetic_assay_type should be set to MUTATIONAL_SIGNATURE","datatype should be set to LIMIT_VALUE","stable_id values should end with: _{filetype}_{identifier}, where:","filetype is either contribution, pvalue or counts","identifier is consistent between files belonging to the same analysis","Multiple signatures can be added to a single study, as long as they have different identifiers in their stable id (e.g., contribution_SBS and contribution_DBS)","In generic_entity_meta_properties the NAME value is required. The DESCRIPTION and URL values can be added to display more information and link to external resources in the mutational signatures tab."]},{"l":"Mutational Signature data files","p":["The mutational signature data files follow the same convention as the Generic Assay Data file. Each collection of mutational signatures can consist of up to three different data files, each with an accompanying meta file.","Signature contribution file ( required)","Data file containing the contribution of each signature-sample pair. Values are expected to be 0 ≥ x ≥ 1.","Signature pvalue file (optional)","Data file containing p-values for each signature-sample pair. Values below 0.05 will be shown as significant.","Mutational counts matrix file (optional)","Data file containing nucleotide changes of a sample. cBioPortal has specific visualization options for single-base substitutions (96 channels), double-base substitutions (72 channels) and insertion/deletions (83 channels), following the signatures defined by COSMIC. But other channels can also be used. Values are expected to be positive integers."]},{"l":"Resource Data","p":["The resource data is used to capture resource data in patients, samples and studies. The resources will be represented by URLs with meta data. The types of resources include:","Files: pdf, txt, png, json, etc.","Web links: non-file links e.g. URLs to other systems","the resource file is split into a resource definition file, sample resource file, patient resource file and study resource file. All data files are required to have a matching meta file."]},{"i":"meta-files-1","l":"Meta files","p":["The resource metadata files have to contain the following fields:","cancer_study_identifier: same value specified in meta_study.txt","resource_type: value from (DEFINITION / SAMPLE / PATIENT / STUDY)","data_filename: your datafile"]},{"i":"examples-1","l":"Examples","p":["An example metadata file, e.g. named meta_resource_definition.txt, would be:","An example metadata file, e.g. named meta_resource_sample.txt, would be:"]},{"i":"data-files-1","l":"Data files"},{"l":"Resource Definition Data File","p":["The resource definition file should follow this format, it has three required columns:","RESOURCE_ID (required): a unique resource ID. This field allows only numbers, letters, points, underscores and hyphens.","DISPLAY_NAME (required): a display name for resources.","RESOURCE_TYPE (required): resource type for resources, must be SAMPLE, PATIENT or STUDY.","DESCRIPTION (optional): a discription for resources.","OPEN_BY_DEFAULT (optional): define if the resource will be open by default ( true/ false), dafault is false.","PRIORITY (optional): if not given, will give a default value."]},{"l":"Example Resource Definition data file"},{"l":"Sample Resource Data File","p":["The sample resource file should follow this format, it has four required columns:","PATIENT_ID (required): a unique patient ID. This field allows only numbers, letters, points, underscores and hyphens.","SAMPLE_ID (required): a unique sample ID. This field allows only numbers, letters, points, underscores and hyphens.","RESOURCE_ID (required): a unique resource ID which should also be included in the Resource Definition data file.","URL (required): url to the resources, start with http or https."]},{"l":"Example Sample Resource data file"},{"l":"Patient Resource Data File","p":["The patient resource file should follow this format, it has three required columns:","PATIENT_ID (required): a unique patient ID. This field allows only numbers, letters, points, underscores and hyphens.","RESOURCE_ID (required): a unique resource ID which should also be included in the Resource Definition data file.","URL (required): url to the resources, start with http or https."]},{"l":"Example Patient Resource data file"},{"l":"Study Resource Data File","p":["The study resource file should follow this format, it has two required columns:","RESOURCE_ID (required): a unique resource ID which should also be included in the Resource Definition data file.","URL (required): url to the resources, start with http or https."]},{"l":"Example Study Resource data file"},{"l":"Custom namespace columns"},{"l":"Adding annotation columns through namespaces","p":["Custom columns can be added to the data files of mutations, structural variants and discrete copy number (long) data. The columns can be imported through the namespace mechanism into a database table column called ANNOTATION_JSON. Any columns starting with a prefix specified in the namespaces field in the metafile will be imported into the database. Namespace columns should be formatted as the namespace and namespace attribute seperated with a period (e.g ASCN.total_copy_number where ASCN is the namespace and total_copy_number is the attribute).","An example cBioPortal mutation data file with the following additional columns:","imported with the following namespaces field in the metafile:","will import the ASCN.total_copy_number and ASCN.clonal column into the database. MUTATION.name and MUTATION.type will be ignored because mutation is not specified in the namespaces field."]},{"l":"Representation of namespace columns by mutation API endpoints","p":["Columns added through namespaces will be returned by relevant mutation, discrete copy number and structural variant API endpoints. Namespace data will be available in the namespaceColumn of respective JSON representations of mutation records. The namespaceColumns property will be a JSON object where namespace data is keyed by name of the namespace in lowercase. For instance, when namespace ZYGOSITY is defined in the meta file and the data file has column ZYGOSITY.status with value Homozygous for a mutation row, the API will return the following JSON record for this mutation (only relevant fields are shown):","Note: ASCN namespace data is not exported via the namespaceColumns field."]},{"l":"Representation of namespace columns in the cBioPortal frontend","p":["Namespace columns will be added as columns to mutation, structural variant and copy number alteration tables in Patient View and Results View. The case of the namespace in the column header will be as specified in the mutations meta file and the column name will be capitalized.","For instance, this metafile entry:","and this column header:","will show in the mutation table with column name:","Note: namespace columns are recognized by a case-insensitive match of the namespace reported in the meta file and the first word in the column header."]}],[{"l":"Z Score normalization script"},{"l":"Introduction","p":["For some data types, when uploading to cBioPortal, it is currently necessary to also provide a z-score transformed version of your input file. The z-score data is essential for the oncoprint functionality. The oncoprint shows high or low mRNA expression of the genes, based on the threshold the user sets when selecting the genomic profile.","⚠️ Please keep in mind that the z-scores are calculated using only patient data. Hence, 'mRNA High' in this case implies higher expression than the average patient. Also, the source data on which the z-score data is based does not necessarily follow the normal distribution. If your data does not follow the normal distribution, the z-score threshold is less reliable and will result in more false positives or false negatives. You can consider log transforming your value before calculating z-scores to improve this. However, even the logged values may not follow the normal distribution, especially if the data is bimodal."]},{"l":"The cBioPortal Z-Score calculation method","p":["cBioPortal currently generates two z-score profiles using two different base populations:","Distribution based on diploid samples only: The expression distribution for unaltered copies of the gene is estimated by calculating the mean and variance of the expression values for samples in which the gene is diploid (i.e. value is \"0\" as reported by discrete CNA data). We call this the unaltered distribution. If the gene has no diploid samples, then its normalized expression is reported as NA.","Distribution based on all samples: The expression distribution of the gene is estimated by calculating the mean and variance of all samples with expression values. If the gene has samples whose expression values are all zeros or non-numeric, then its normalized expression is reported as NA.","Otherwise for every sample, the gene's normalized expression for both the profiles is reported as","where r is the raw expression value, and mu and sigma are the mean and standard deviation of the base population, respectively."]},{"l":"How to proceed","p":["cBioPortal expects z-score normalization to take place per gene. You can calculate z-scores with your own preferred method, or use one of the cBioPortal provided approaches:","convertExpressionZscores.pl applies Method 1 (diploid samples as base population)","NormalizeExpressionLevels_allsampleref.py applies Method 2 (all samples as base population)","Examples of the calculation and running the programs are below."]},{"l":"convertExpressionZscores method","p":["Given expression and Copy Number Variation data for a set of samples (patients), generate normalized expression values."]},{"l":"Parameters","p":["copy_number_file expression_file output_file normal_sample_suffix [min_number_of_diploids]","copy_number_file: the discrete copy number (CNA) file","expression_file: the expression (exp) data file.","output_file: the output file to be generated","normal_sample_suffix: use this to identify which of your samples are \"normal\" samples (if any). E.g. normal TCGA samples have a suffix \"-11\". Set it to some dummy value, e.g. \"NONE\", if you have no normal samples in your data."]},{"l":"Algorithm","p":["Input: discrete copy number (CNA) and expression (exp) files"]},{"l":"Example Calculation","p":["Calculate mean and stdev where CNA is 0 (=diploid):","Calculate the z-scores:","Note: this implies that your full dataset does not have average=0, std=1"]},{"l":"Running the script","p":["To run the script type the following commands when in the folder cbioportal_source_folder/core/src/main/scripts:","and then"]},{"i":"example","l":"Example:"},{"l":"NormalizeExpressionLevels_allsampleref method","p":["Given the expression data for a set of samples, generate normalized expression values with the reference population of all samples independent of sample diploid status."]},{"i":"parameters-1","l":"Parameters","p":["expression_file output_file [log_transform] [exclude_zero_negative_values]","expression_file: the expression (exp) data file.","output_file: the output file to be generated.","log_transform: Use this to log transform the data before calculating z-scores (optional).","exclude_zero_negative_values: Use this to exclude zero's or negative counts from the reference population when normalizing the data (optional)."]},{"i":"algorithm-1","l":"Algorithm","p":["Input expression data file"]},{"l":"Log-transforming the data","p":["Using the -l option above calculates log base 2 of the expression values.","Here's how we handle the Negative values when log transforming:"]},{"i":"example-calculation-1","l":"Example Calculation:","p":["Log transform and calculate the z-scores (without -e option):"]},{"i":"running-the-script-1","l":"Running the script","p":["To run the script clone the datahub-study-curation-tools from here and type the commands when in the folder zscores/zscores_relative_allsamples:"]},{"i":"example-1","l":"Example:"}],[{"l":"Study Curation Guide","p":["This guide is to help data curators learn how to curate a study on their own computer"]},{"l":"Prerequisites","p":["To follow this guide the curator should have some familiarity with running commands on the command line. We will be using Docker. No Docker knowledge is required, one will obtain some basic understanding by following the guide.","Learn how to setup cBioPortal locally here first."]},{"l":"Load an example study","p":["After having followed the steps in the Docker Deployment instructions, you will end up with the study Low-Grade Gliomas (UCSF, Science 2014) loaded locally. Now let's try to import another study:","Choose another example study from the datahub. Note the name of the folder which is identical to the study id defined in meta_study.txt.","From the root of the cbioportal-docker-compose folder run DATAHUB_STUDIES=my_study_id ./study/init.sh. Change my_study_id to the study you picked in 1. The study should now be downloaded in ./study/.","Import the study by running docker-compose exec cbioportal metaImport.py -u http://cbioportal:8080 -s study/my_study_id/ -o. Again change my_study_id to the study you picked in 1. This should import the study.","Restart the cbioportal instance docker-compose restart cbioportal and see if the new study shows up on http://localhost:8080"]},{"l":"Curate a new study","p":["The cBioPortal team has curated many published studies in formats suitable for import in cBioPortal. These can be found on the datahub and can serve as an example of how our curation processes works. You can find a step by step description of how to curate a new study here."]}],[{"i":"data-loading-importing-without-validation-and-deleting-studies","l":"Data loading: Importing without validation and deleting studies","p":["For data curators and developers cbioportalImporter.py is available. This script can import data regardless of validation results. If data format is incorrect, the importer may stop with an error or crash, or leave the database in an inconsistent state.","This script can also be used to delete studies.","Requirements","Importing a study without validation","Deleting a study"]},{"l":"Requirements","p":["This script requires $PORTAL_HOME to point to the folder containing your cBioPortal configuration. This can be done with:","The script itself can be found in cbioportal_source_folder/core/src/main/scripts/importer."]},{"l":"Importing a study without validation","p":["To import a study without validation, run:","For example:"]},{"l":"Deleting a study","p":["To remove a study, run:","The meta_study.txt file should contain the study ID in cancer_study_identifier: of the study you would like to remove.","For example:","If you have the Cancer Study Id of the study, or studies you want to remove, you can also use:","Where study1_id is the Cancer Study Id of the study you would like to remove.","You can also remove multiple studies at once by passing the Cancer Study Ids separated by commas:","Where study1_id, study2_id and study3_id are the Cancer Study IDs of the studies you would like to remove."]}],[{"l":"Importing single data files for development","p":["In some cases, for example during development, it may be useful to import a single data file into an existing study. To import one data file at a time, you can use the following command. Note that this process will not validate the data.","This can be done by running cbioportalImporter.py from cbioportal_source_folder/core/src/main/scripts/importer/."]},{"l":"Requirements","p":["This script requires $PORTAL_HOME to point to the folder containing your cBioPortal configuration. This can be done with:"]},{"l":"Workflow","p":["First, if your cancer type does not yet exist, you need to create it:","Next, create the study using","The meta file has to contain the study information.","Now you can import your data file(s):","⚠️ Your first data file should always be the clinical data!","Finally, after you've imported all data, import your case lists:"]},{"i":"example","l":"Example:"}],[{"l":"Data Loading Tips and Best Practices","p":["Here we describe some Tips and Best Practices."]},{"i":"running-gistic-20","l":"Running GISTIC 2.0","p":["To generate discrete copy number data file you may need to run GISTIC 2.0. GISTIC 2.0 can be installed or run online using the GISTIC 2.0 module on GenePattern. Running GISTIC 2.0 requires two input files:","A segmentation file, which contains the segmented data","A marker file, which identifies the marker names and positions of the markers in the original dataset (before segmentation).","In some cases the marker file may not be available. Your can create one as follows: Using your segmentation file, create a line for each start and end position. E.g. if your seg file contains","In your markerfile this becomes"]},{"l":"Effect of cBioPortal instance on validation","p":["When validating data, you can decide against which server to validate your data with the -u flag. The selected server can have a significant effect on the validation results in the following ways:","Genes may or may not be available on a specific server","Clinical data and its description may vary per server","...","It is advised to use the server where you plan on upload your data as validation server."]}],[{"l":"Mutation data transcript annotation","p":["This document describes how each mutation in cBioPortal gets annotated with a specific gene symbol + protein change."]},{"l":"Biological Background","p":["This section explains the concepts of protein isoforms and transcripts."]},{"i":"what-is-an-isoform","l":"What is an isoform?","p":["From a single gene (string of nucleotides) multiple protein sequences can be formed (string of amino acids). For example: parts of the gene that code for proteins (exons) can be included or excluded through a process known as alternative splicing. Each of the different resulting proteins is called an isoform. A single mutation can impact the isoforms differently. E.g. in one isoform it might change a P to a T, but in the other isoform that particular exon does not get included and it is therefore not changing the amino acid sequence at all. In cBioPortal for convenience sake we assign a single gene symbol + protein change to each mutation. For most cases this works well because there is only one protein isoform relevant in a clinical setting. There are of course exceptions and we are therefore working on improving this representation. An explanation of the relation between transcripts and protein isoforms can be found in the next section."]},{"i":"what-is-a-transcript","l":"What is a transcript?","p":["DNA is transcribed to a pre-mRNA transcript which includes intron and exon regions. Splicing and other processes then take place to form the resulting mature mRNA transcript that only contains exons, which subsequently can be translated to a protein sequence. An mRNA transcript can thus be associated with a specific protein isoform. The Ensembl database assigns ids for these transcript with names like ENSTxxx. You can see this on e.g. the Ensembl website for the BRAF gene:","The transcript ENST00000288602.6 is 2480 base pairs long (nucleotides ACGT) and the associated protein isoform is 766 amino acids (V/P/etc). You can see we are showing that same transcript and protein isoform on cBioPortal:","For each gene name in cBioPortal a canonical/default transcript is assigned. These assignments are stored in Genome Nexus and explained below. Although cBioPortal does not store changes to different transcripts/isoforms for each mutation in the database itself, it does allow viewing them on the Mutations Tab by re-annotating the mutations on the fly through Genome Nexus whenever a user clicks on the transcript dropdown."]},{"l":"Transcript Assignment","p":["The cBioPortal database stores one gene + protein change annotation for each mutation event in the database. To allow comparing mutation data across studies it is important to annotate the mutation data (be it in MAF or VCF format) in the same way, otherwise the gene + protein changes can mean entirely different things. For all public studies stored in datahub we leverage Genome Nexus to do so. Genome Nexus assigns one canonical Ensembl Transcript + gene name + protein change for each mutation. You can find the mapping of hugo symbol to transcript id here. There are two sets of default transcripts: uniprot and mskcc. We recommend to use the mskcc set of transcripts when starting from scratch, since these are more up to date and correspond to transcripts that were chosen as relevant for clinical sequencing at MSKCC. The uniprot set of transcripts was constructed several years ago, but we are no longer certain about the logic on how to reconstruct them hence they are not being kept up to date. One can see the differences between the two in this file. For the public cBioPortal (https://www.cbioportal.org) and datahub we are using mskcc, for the GENIE cBioPortal ( https://genie.cbioportal.org) we still use uniprot. As of cBioPortal v5 the default is mskcc for local installations. Prior to v5 it was uniprot. We recommend that people upgrading to v5 consider migrating to mskcc as well (see migration guide and the properties reference docs)."]},{"l":"How default transcript assignment affects the Mutations Tab","p":["The Mutations Tab shows the full protein sequence. The one shown by default is the canonical transcript ( mskcc or uniprot depending on configuration). The mutations are drawn on the lollipop based on the protein position found in the cBioPortal database. For the public cBioPortal all mutation data in MAF format are annotated using Genome Nexus to add the gene and protein change columns. This is then imported into the cBioPortal database. Whether you choose to use the set of uniprot or mskcc transcripts, make sure to indicate it in the [Genome Nexus Annotation Pipeline]( https://github.com/genome-nexus/genome- nexus-annotation-pipeline#maf-annotation)(--isoform-override mskcc or uniprot) when annotating as well as in the properties file of cBioPortal. That way the Mutations Tab will show the correct canonical transcript. Note that whenever somebody uses the dropdown on the Mutations Tab to change the displayed transcript, Genome Neuxs re-annotates all mutations on the fly. The browser sends over the genomic location (chrom,start,end,ref, alt) to get the protein change information for each transcript. Since many of the annotations are for the canonical transcripts only we are currently hiding annotations for non-canonical transcripts."]},{"l":"Plans for default transcripts","p":["We are planning to move to a single set of default transcripts over time. Prior to v5 uniprot was used for the public facing portals and local installations. Our plan is to use mskcc everywhere and eventually we will most likely move to MANE. MANE is only available for grch38 and since most of our data is for grch37 this is currently not feasible. Whichever set of transcripts you choose to use, make sure to indicate so in the Genome Nexus Annotation Pipeline(--isoform-override mskcc or uniprot) and put the same set of transcripts in the properties file of cBioPortal, such that the Mutations Tab will show the correct canonical transcript (currently defaults to mskcc). The re-annotation of mutations only happens once a user clicks to change the transcript, which is why it's important that the protein change in the database is for the specific transcript displayed first."]}],[{"l":"Import OncoKB annotations as custom driver annotations","p":["The Annotation Configuration menu in Study View and Group Comparison is available only when custom driver annotations are present in the cBioPortal database for the genes in the study (or studies). In order to use OncoKB annotations to filter mutations and discrete copy number alteration in Study View and Group Comparison, OncoKB annotations can be added to the respective data files of a study prior to import into the database. This page describes how to import OncoKB annotations as custom driver annotations. It assumes the following requirements have been satisfied:","The cBioPortal software has been correctly built from source.","The user is able to successfully import a study into the database.","The study subjected to OncoKB import is confirmed to be valid"]},{"l":"Import of OncoKB annotations when loading a study","p":["OncoKB annotations can be added automatically to the study files when the study is loaded into the database by adding the--import_oncokb parameter to the metaImport.py script like so:","This will add OncoKB data to the mutation and discrete CNA files of a study, revalidate the results and load the study into the database.","The addition of mutation and discrete CNA files is explained in detail below."]},{"l":"Update of MAF file with OncoKB annotations","p":["OncoKB annotations can be added to the MAF file by running importOncokbMutation.py like so:","Where -s is the path to the directory of the MAF file and -u is the URL to a cBioPortal instance (needed for resolution of gene identifiers).","importOncokbMutation.py will add OncoKB annotations as custom driver annotation columns in the MAF file. The unmodified MAF file will be stored in the study directory with the ONCOKB_IMPORT_BACKUP_ prefix."]},{"l":"Update of Discrete Copy Number file with OncoKB annotations","p":["OncoKB annotations can be added to the Discrete Copy Number data by running importOncokbDiscreteCNA.py like so:","Where -s is the path to the directory of the iscrete Copy Number data file and -u is the URL to a cBioPortal instance (needed for resolution of gene identifiers).","importOncokbDiscreteCNA.py will create a custom driver annotation file with name data_cna_pd_annotation.txt in the study directory. It will add a field pd_annotations_filename field in the CNA meta file that references the newly created custom driver annotation file. The unmodified CNA meta file will be stored in the study directory with the ONCOKB_IMPORT_BACKUP_ prefix."]}],[{"l":"Import Gene Sets in cBioPortal","p":["Gene sets are collections of genes that are grouped together based on higher level function or system characteristics, such as being part of the same molecular process or found to be co-regulated for example. Assessing gene sets in cBioPortal is useful when the user wants to visualize the number of mutations in sets of genes, or wants to see if all genes in a set are up- or down-regulated. To visualize gene set variation in a sample, the user can calculate scores per gene set per sample using the Gene Set Variation Analysis (GSVA) algorithm ( Hänzelmann, 2013).","Before loading a study with gene set data, gene set definitions have to be added to the database. These can be custom user-defined sets, or sets downloaded from external sources such as MSigDB. Additionally, a gene set hierarchy can be imported which is used on the cBioPortal Query page for selecting gene sets."]},{"l":"Quick example","p":["This example shows how the process of importing gene set data using test data.","Navigate to scripts folder:","Import gene sets and supplementary data: Note: This removes existing gene set, gene set hierarchy and gene set genetic profile data.","Import gene set hierarchy data:","Restart Tomcat if you have it running or call the /api/cache endpoint with a DELETE http-request(see here for more information).","Import study (replace argument after -u with local cBioPortal and -html with preferred location for html report):"]},{"l":"Requirements for gene sets in cBioPortal","p":["Gene set functionality was added in cBioPortal 1.7.0. Please use this or a later version. In addition, the database has to be updated to version 2.3.0 or higher, depending on the cBioPortal version. This can be done by running the python wrapper migrate_db.py for migration.sql.","Updating the database is described here."]},{"l":"Import Gene Sets"},{"l":"File formats","p":["Once you have initialized MySQL with cBioPortal database, it is possible to import gene sets. The format of the gene set data file is the Gene Matrix Transposed file format (.gmt). This format is also used by the MSigDB, which hosts several collections of gene sets on: https://software.broadinstitute.org/gsea/msigdb/","Sample of .gmt file:","GMT files contain a row for every gene set. The first column contains the EXTERNAL_ID or stable id(MsigDB calls this \"standard name\"), e.g. GO_POTASSIUM_ION_TRANSPORT, not longer than 100 characters. The second column contains the REF_LINK. This is an optional URL linking to external information about this gene set. Column 3 to N contain the Entrez gene IDs that belong to this gene set.","Additional information can be placed in a supplementary file. This file should be a .txt, containing columns for the stable id, the long name (max 100 characters) and description of the gene set (max 300 characters).","Sample of supplementary .txt file:"]},{"l":"Run the gene set importer","p":["The importer for gene sets can be run with a perl wrapper, which is located at the following location and requires the following arguments:","The --new-version argument with a Version parameter is used for loading new gene set definitions. It is not possible to add new gene sets or change the genes of current gene sets, without removing the old gene sets first. This is to prevent the user from having gene sets from different definitions and data from older definitions. The user can choose the name or number of the Version as he likes, e.g. msigdb_6.1 or Oncogenic_2017. Running the script with --new-version removes all previous gene sets, gene set hierarchy and gene set genetic profiles. A prompt is given to make sure the user wants to do this. Note that it is possible enter the same version as the previous version, but previous data is removed nevertheless.","The --update info can be used only to update only the long name, description and reference URL."]},{"l":"Import Gene Set hierarchy","p":["After importing gene sets, you can import a gene set hierarchy that is used on the query page to select gene sets."]},{"l":"File format","p":["For gene set hierarchy files, we use the YAML format. This is common format to structure hierarchical data.","Sample of format (note this is mock data):","To make your own hierarchy, make sure every branchname ends with :. Every branch can contain new branches (which can be considered subcategories) or gene sets (which are designated by the Gene sets: statement). The gene set names are the stable ids imported by ImportGenesetData.java and should start with -."]},{"l":"Running the gene set hierarchy importer"},{"l":"Import a study with gene set data","p":["Gene set data can be added to a study folder and subsequently import the whole study with metaImport.py. cBioPortal supports GSVA Scores and p-values (from bootstrapping) calculated using Gene Set Variation Analysis (GSVA, Hänzelmann, 2013). A description of GSVA study data can be found in the cBioPortal File Formats documentation."]},{"l":"References","p":["GSVA: gene set variation analysis for microarray and RNA-Seq data Sonja Hänzelmann, Robert Castelo and Justin Guinney, BMC Bioinformatics, 2013 https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-14-7 https://www.bioconductor.org/packages/release/bioc/html/GSVA.html","Gene set enrichment analysis: A knowledge-based approach for interpreting genome-wide expression profiles Aravind Subramanian, Pablo Tamayo, Vamsi K. Mootha, Sayan Mukherjee, Benjamin L. Ebert, Michael A. Gillette, Amanda Paulovich, Scott L. Pomeroy, Todd R. Golub, Eric S. Lander, and Jill P. Mesirov, PNAS, 2005 https://www.pnas.org/content/102/43/15545 https://software.broadinstitute.org/gsea/msigdb"]}],[{"l":"Import Gene Panels","p":["This page describes how to import a gene panel into the cBioPortal database. It assumes the following requirements have been satisfied:","The cBioPortal software has been correctly built from source.","The gene panel to import is in the proper file format. See Gene Panel File format for more information.","The PORTAL_HOME environment variable has been properly defined. See Loading a Sample Study for more information."]},{"l":"Gene panel file format","p":["The gene panel file follows the format of a meta file with the following fields:","stable_id: The name of the gene panel. This should be unique across all studies, as gene panels can be globally applied to any sample and any genetic profile.","description: A description of the gene panel.","gene_list: Tab separated genes, represented either by all gene symbols or all Entrez gene IDs.","An example gene panel file would be:"]},{"l":"Import command","p":["In this example, we are loading the example gene panels which resides in the sample dataset study_es_0.","After loading gene panels into the database, please restart Tomcat or call the /api/cache endpoint with a DELETE http-request(see here for more information) so that the validator can retrieve gene panel information from the cBioPortal API."]},{"l":"Update existing gene panel","p":["If a gene panel exists in the database with the same name as the one being imported, and there exists cancer study data that refers to this gene panel, the ImportGenePanel command will abort. In order to reimport the gene panel in this situation, run the UpdateGenePanel command.","If the incoming gene panel is the same as the original gene panel, whether through importing or updating, then no changes shall be made to the gene panel. If the incoming gene panel is empty, then the script will abort. Genes in the incoming gene panel that were not in the original shall be added to the existing gene panel. Conversely, genes not in the incoming gene panel that were in the original shall be removed from the existing gene panel. The UpdateGenePanel command will prompt twice to confirm changes made to the gene panel, such as genes to be added or removed."]}],[{"l":"Study View Customization"},{"l":"How does the study view organize the charts","p":["Study view page is fully responsive, it will try to fit as many charts as possible based on the browser's width and height.","The layout of a chart is determined mainly based on priority. Higher priority will promote chart closer to the left-top.","In order to improve the layout, we added a layout algorithm layer. The study view page is using grid layout. All charts will be put into 2-dimensional systems. For example, pie chart, by default, takes 1 block and bar chart uses two blocks. All charts will be placed from left to right, top to bottom. In order to prevent misalignment, we promote small charts to fit into the space.","For logged-in(authenticated) users, charts layout is saved to users profile i.e, whenever user tries to re-visits the same url, previously saved layout will be loaded."]},{"l":"Study View Customization with Priority Data","p":["Example of study view in public portal: https://www.cbioportal.org/study?id=acc_tcga,lgg_tcga#summary"]},{"l":"Priorities","p":["20","200","2000","30","300","3000","40","400","70","8","80","9","90","Additional Info","AGE","Cancer Studies","CANCER_TYPE","CANCER_TYPE_DETAILED","Chart name(clinical attribute ID)","CNA Bar Chart","CNA Genes Table","Currently, we preassigned priority to few charts, but as long as you assign a priority in the database except than 1, these preassigned priorities will be overwritten.","Disease Free Survival Plot","Frontend default priority","GENDER, SEX","Mutated Genes Table","Mutation Count Bar Chart","Mutation Count vs. Fraction of Genome Altered Density Plot","Number of Samples Per Patient","Overall Survival Plot","The default score is 1.","The priority system is represented with a final score. The higher the final (numeric) score, the higher priority assigned.","This is combination of DFS_MONTH and DFS_STATUS","This is combination of OS_MONTH and OS_STATUS","To disable the chart, set the priority to -1.(Currently disables charts for single clinical attributes only)","To promote certain chart in study view, please increase priority in the database to a certain number. The higher the score, the higher priority it will be displayed in the study view. If you want to hide chart, please set the priority to 0. For combination chart, as long as one of the clinical attributes has been set to 0, it will be hidden."]}],[{"l":"Updating your cBioPortal installation","p":["As of release 1.1.0 cBioPortal has a Database schema update mechanism which comes into play whenever the new version of the portal code relies on specific DB schema changes to be applied. The portal will automatically check if the DB schema is according to what it expects. It does so by comparing the version number of the portal code with the version number of the DB schema. If they are equal, it assumes the DB schema has been upgraded. If not, it will require the administrator to run a migration script. Below are the steps to ensure your DB schema is updated correctly."]},{"l":"First time","p":["The first time you update from release 1.0.4(or lower) to release 1.1.0(or higher), you should get a an error banner page after restarting your webserver. The error should state something like:","where xxx and yyy will be different version numbers.","If you get DB version expected by Portal: 0(i.e. you are building the new release from source), you need to add a new property to your portal.properties file which is needed for this check."]},{"l":"Step1","p":["In your portal.properties file (e.g. your_cbioportal_dir/src/main/resources/portal.properties) add the following property:"]},{"l":"Step2","p":["Compile your code again. After restarting the webserver the page should now state something like: DB version expected by Portal: 1.1.0(or higher), while the DB version remains as Current DB version: -1."]},{"l":"Running the migration script","p":["First, make sure you have the DB connection properties correctly set in your portal.properties file (see DB connection settings here).","Dependencies: the migration script is a Python script that depends on the mysqlclient library. If necessary, you can install it with the following commands (example for Ubuntu):","For macOS, try the following:","and see https://github.com/PyMySQL/mysqlclient-python/blob/master/README.md#prerequisites if problems occur during installation.","To run the migration script first go to the scripts folder your_cbioportal_dir/core/src/main/scripts and then run the following command:","This should result in the following output:","Final step: Restart your webserver or call the /api/cache endpoint with a DELETE http-request(see here for more information)."]}],[{"l":"Updating the gene names and aliases tables","p":["This manual is intended for users that have knowledge about the structure of the cBioPortal seed database.","When loading studies into cBioPortal it is possible for warnings to occur that are caused by an outdated seed database. Gene symbols can be deprecated or be assigned to a different Entrez Gene in a new release. Also Entrez Gene IDs can be added. This markdown explains how to update the seed database, in order to use the most recent Entrez Gene IDs.","The cBioPortal scripts package provides a method to update the gene and gene_alias tables."]},{"l":"Prepare"},{"l":"Human genes","p":["Download gene_info.txt Generated based on latest HGNC release using script HERE"]},{"l":"Mouse genes","p":["Download Mus_musculus.gene_info.gz from ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Mus_musculus.gene_info.gz Unzip the downloaded file with the command gunzip Mus_musculus.gene_info.gz"]},{"l":"MySQL steps","p":["Execute these steps in case you want to reset your database to the most recent genes list from NCBI.","1- Start a new MySQL database with the previous seed database, which can be found on cBioPortal Datahub for human and mouse.","2- If DB engine supports foreign key (FK) constraints, e.g. InnoDB, drop constraints:","3- Empty tables gene and gene_alias","4- Restart cBioPortal (restart webserver) or call the /api/cache endpoint with a DELETE http-request(see here for more information) to clean-up any cached gene lists.","5- To import gene data type the following commands when in the folder cbioportal_source_folder/core/src/main/scripts:"]},{"i":"human-genes-1","l":"Human genes"},{"i":"mouse-genes-1","l":"Mouse genes","p":["IMPORTANT NOTE:","The reference_genome table needs to be populated before updating the gene table. Further details can be found in this document.","Use --species option when importing genes for a species other than human","Use the gene table if you query information such as hugo symbols, types of the gene","Use reference_genome_gene table if you query information such as chromosome, cytoband, exonic length, or the start or end of the gene","Load genes only to the reference_genome_gene table without updating the gene table, please use the following command:"]},{"i":"human-genes-2","l":"Human genes"},{"i":"mouse-genes-2","l":"Mouse genes","p":["6- ⚠️ Check the gene and gene_alias tables to verify that they are filled correctly.","7- Additionally, there are other tables you may want to update now (only in human).","Updating the COSMIC coding mutations, can be downloaded from here and require the script importCosmicData.pl","8- Clean-up old data:","9- If DB engine supports FK constraints, e.g. InnoDB, restore constraints:","10- You can import new gene sets using the gene set importer. These gene sets are currently only used for gene set scoring. See Import-Gene-Sets.md and File-Formats.md#gene-set-data.","For example, run in folder cbioportal_source_folder/core/src/main/scripts:","Please make sure the version gene sets is the same as the version used to calculate gene set scores in your data."]}],[{"l":"Migration Guide","p":["This page describes various changes deployers will need to make as they deploy newer versions of the portal. -"]},{"i":"v53---v54","l":"v5.3 -> v5.4","p":["Remove db.host and db.portal_db_name and db.use_ssl properties from the portal.properties file or JVM parameters. Update property db.connection_string to encode the hostname, port, database and other parameters according to Database Settings documentation and pass via portal.properties file or as JVM parameter."]},{"i":"v4---v5","l":"v4 -> v5","p":["All fusion profiles are now required to be migrated to structural variant format. One can use this migration tool to migrate the fusion files.","All fusion files on datahub were migrated to the structural variant format and their molecular profile ids were renamed from {study_id}_fusion to {study_id}_structural_variants. If you are using these datahub files one would need to re-import them.","Study view user setting will be outdated after migration, please follow Clear Study View User settings section in Session Service Management","The new default set of transcripts for each gene has changed from uniprot to mskcc. See the Mutation Data Annotation Section for more details. To keep the old set of default transcripts add the property genomenexus.isoform_override_source=uniprot to the properties file.","See the v5.0.0 release notes for more details."]},{"i":"v3---v4","l":"v3 -> v4","p":["Introduces logback package for logging. If you don't have any custom log4j.properties file, no changes are necessary","Cleans up several old databases ( PR). In theory the migration should be seamless, since the docker container detects an old database version and migrates it automatically.","See the v4.0.0 release notes for more details."]},{"i":"v2---v3","l":"v2 -> v3","p":["Session service is now required to be set up. You can't run it without session service. The recommended way to run cBioPortal is to use the Docker Compose instructions."]},{"i":"v1---v2","l":"v1 -> v2","p":["Changes cBioPortal to a Single Page App (SPA) written in React, Mobx and bootstrap that uses a REST API. It shouldn't change anything for a deployer."]}],[{"i":"msk-maintenance-in-progress","l":"MSK Maintenance (In Progress)","p":["We provide our cBioPortal's maintenance page publicly in the hope that it may be useful to others"]},{"l":"Database Migration","p":["In the database migration process, we are going to have two main steps: building importers and updating database scheme."]},{"l":"Building Importers","p":["(Optional) Remove existing jars","[--build|-b=], build_specifier should be one of the following","[--cbioportal-git-hash|-cgh=]","[--skip-deployment|-sd=]","[--skip-git-pull|-sgp=]","all (build for all artifacts)","Available parameters:","Build all importers at once (build all importers except cmo-pipelines)","Build importer","Build multiple importers","Building single importer:","cbioportal","cd /data/portal-con/git-repos/pipelines","cd /data/portal-cron/git-repo/pipelines-configuration/build-importer-jars","cmo-pipelines (cmo-pipelines artifacts only)","Codebases:","Copy importer properties to /data/portal-con-git-repos/cbioportal/src/main/resources(e.g. triage /data/portal-cron/git-repos/pipelines-configuration/properties/import-triage/*)","genie-archive-importer","genie-aws-importer","genome nexus annotation pipeline","Go to build importer jars folder","hgnc-importer","importers (all importers except cmo-pipelines)","Login to pipelines server","Make sure the local cbioportal codebase is on the correct git hash at /data/portal-cron/git-repos/cbioportal","msk-cmo-importer","msk-dmp-importer","mvn clean install -DskipTests","Navigate to pipelines folder","pipelines","public-importer","rm *.jar","Run export PORTAL_HOME=/data/portal-cron/git-repos/cbioportal","Run sh /data/portal-con/scripts/automation-environment.sh","Sets general env variables needed for building","sh buildproductionjars.sh -sd=true -sgp=true -b=importers","Specific to cbioportal build, looks for properties and xml files off of $PORTAL_HOME","Specify importer to be build","Take the cbioportal hash and add it to genome nexus annotation pipeline POM -> push to fork","Take the cbioportal hash and new genome nexus annotation pipeline hash (from above) and propagate to pipelines POM; also update db version if db migration is part of change","triage-cmo-importer"]},{"l":"Updating Database Scheme","p":["Database needs to be updated one by one, we have four main databases: triage, private, genie, and public. Take triage database as an example.","Migrate one database (e.g. triage)","SSH into pipeline server","Checkout to the commit that contains the latest database scheme","Check if property sets up correctly to the right database (triage)","vi /data/portal-con-git-repos/cbioportal/src/main/resources/portal.properties","Move to directory","cd /data/portal-con-git-repos/cbioportal","Run database migration using script:","python3 core/src/main/scripts/migrate_db.py --properties-file src/main/resources/portal.properties --sql db-scripts/src/main/resources/migration.sql","Monitor the DB migration process and look for possible errors","Access database and verify the DB scheme is updated"]}],[{"l":"Development","p":["cBioPortal is an open source project hosted on Github. The core team will consider pull requests from any source.","The following section describes how to set up a development environment and start contributing.","Note that cBioPortal is split into a backend project (api) and a frontend project which are kept in separate repositories.","Frontend(Typescript, React, Mobx)","Backend(Java, Spring, MySQL, MongoDB)","If you are interested in coordinating the development of new features, please contact cbioportal@cbioportal.org or reach out on https://slack.cbioportal.org."]}],[{"l":"Feature Development Guide","p":["This is a guide for developers that are implementing a new feature."]},{"l":"Before Implementation","p":["As a first step it is important to determine how complex the proposed feature is. Incremental improvements on existing features are often easier to accomplish and require input from fewer people. Most minor changes can be submitted as a Pull Request. If the proposed feature would require one or more days of work it makes sense to connect on slack to discuss the idea. For more complex new features that require weeks of work or more, it is best to get input from several people in the cBioPortal community, including people with a deep understanding of the cBioPortal product and its users as well as the engineers that write the software. In that case we often start out with a Request For Comments document that describes the feature in more detail, see our list of RFCs for some examples. The community can then help guide the feature development in the right direction.","During this process you will most likely receive some pointers which part of the stack you will be editing (see Architecture Overview). This will be helpful when actually starting your implementation and figuring out how to set up your development environment. For many features it is not necessary to understand all parts of the stack, so seeking out advice on this is highly recommended.","Before you start implementing a more complex feature, ideally many of these things are clear:","Who can you contact for help?","Who will be helping to review the code?","What part of the stack will you work on?","Gold stars if you already start thinking about:","How do we release the feature incrementally?","When is feature development done?","See more thoughts about these topics further below"]},{"l":"Starting Implementation","p":["Once you are ready to start implementing, the first thing is to set up the development environment. We strive to make this as easy as possible, but it can often still take some time so definitely reach out if you run into issues. If you haven't submitted a Pull Request to cBioPortal before, it might make sense to look at some good first issues before starting with your feature. This will help to get some familiarity with the process of proposing a change, getting it reviewed, making edits and getting it to production. Don't pick anything to complicated for a first issue, it could be as simple as fixing some typos in the README."]},{"l":"During Implementation","p":["The most important part during implementation is communication. Continue getting feedback as your implementation evolves. One of the best ways to do this is to fully integrate into the development team while you work on a feature. Anybody is welcome to join our weekly planning meeting (Tuesdays 11AM-12PM Eastern Time) and our daily standups 2.30-3PM Eastern Time. Please reach out on slack to get an invite. If these times are not ideal or you're working on the feature more sporadically then it's totally fine to skip them."]},{"l":"Plan to release to production early","p":["Don't wait until the feature is fully finished to get feedback from the product team and the engineering team. Think about ways we can release a portion of the feature to production without finishing the entire thing. We have found feature flags to work well here. Instead of using long running feature branches we try to add a configuration property that allows us to turn the feature on or off. That way portions of the code can be released to production early on. We want to avoid working on some piece of code for more than a week or so without being able to release it. For instance: if one is trying to add some new tab on the Patient View Page, one could start with adding the on/off configuration switch for this tab. That could be released to production relatively quickly."]},{"l":"Regression Testing","p":["Make sure to think about ways to incorporate testing for your feature. We have an extensive suite of unit, integration and end to end tests including automated browser testing that mimics user interaction. Adding some regression tests will make sure the feature won't break with new versions of cBioPortal."]},{"i":"when-is-feature-development-done","l":"When is feature development done?","p":["There are many stages in feature development:","Design","Implementation","Review","Production Deployment","Production Usage Monitoring","The process is hardly ever a linear line and it can move back and forth between any stage. This is expected and one of the reasons why time estimations are notoriously hard. An additional note is that feature development usually isn't done at the moment it gets merged to the main branch and deployed, but rather only after a few weeks of using it in production and not identifying any new issues."]}],[{"l":"cBioPortal ER Diagram","p":["cBioPortal ER Diagram - PDF Version"]}],[{"l":"cBioPortal Database Versioning","p":["Add a constraint","Add a new column to the table","Create a new table","Delete a table","Delete data from a table:","Drop a constraint","Drop column c from the table","Insert multiple rows into a table","MAJOR: A non-backward compatible significant change in the database. Which requires the maintainer to reload and re-import all studies in the database entirely.","MINOR: Including deleting, renaming tables or columns, or changing constraints","PATCH: Changes that don't change existing database schemes but add new tables or columns, manipulating data.","Remove all data in a table","Rename a table from t1 to t2","Rename column c1 to c2","Update data in a table:","We follow the following logic when deciding how/when to increment the version of cBioPortal database. It's a complete modification of semantic versioning (MAJOR.MINOR.PATCH) more suitable for our purposes:"]}],[{"l":"Build with different frontend versions","p":["Maven will build cBioPortal with a cBioPortal-frontend version and git repository url as determined by respectively the frontend.version and frontend.groupId parameters in the root POM.xml.","To build cBioPortal with a different frontend version different values for frontend.version and frontend.groupId parameters can be specified as part of the maven install command. For example:","Remarks:","The frontend.version parameter allows release tags (e.g. 'v2.1.0') and commit sha-hashes (e.g., '93d9cbcb').","The frontend.groupId is a reversed, dot-separated derivative of the git url. Git repository location github.com/cbioportal is represented by the com.github.cbioportal groupId."]}],[{"l":"Session Service"},{"l":"1. Steps to add new Session type through session-service","p":["Define new type at SessionType.java.","Add necessary tests if required.","Update session-service documentation."]},{"l":"2. cBioPortal Backend"},{"i":"21-configuring-session-service","l":"2.1 configuring session-service","p":["Here are the properties that needs to be set"]},{"i":"sessionserviceurl-format","l":"session.service.url format"},{"l":"example"},{"i":"22-updates-to-code","l":"2.2 Updates to code","p":["Update session-service dependency version in pom.xml.","Add/Update api's in SessionServiceController.java to support new session type.","Sometimes we might need to defined model for session. Check how session class is extended in VirtualStudy and how it is consumed in SessionServiceController.java."]},{"l":"3. cBioPortal Frontend","p":["Client code for cbio session service is in https://github.com/cBioPortal/cbioportal-frontend/blob/master/src/shared/api/sessionServiceAPI.ts. Update it accordingly when the new api added in cBioPortal backend SessionServiceController.java"]},{"l":"4. Local development"},{"i":"41-expose-database-port","l":"4.1 Expose database port","p":["If docker-compose.yml is used for running session service and if you like to access mongodb directly then uncomment this block of code"]},{"i":"42-test-results-with-curl-in-terminal","l":"4.2 Test results with cURL in terminal","p":["After you have local cBioPortal instance running, you can use cURL to test session-service endpoints.","Example for GET methods:","Example for POST methods:","Note on Cookie: You can log in to your account in your local cBioPortal instance, after login, copy cookies from requests in network tab. See login configuration for more information."]}],[{"l":"Manual test cases","p":["Linked below is a Google Doc with some manual test cases you might want to look at when developing, and deploying new changes. Note that this list is a work in progress and subject to change.","https://docs.google.com/document/d/1IniMNrrSEGsPRtkaU3Vbcm6sMgcOhytBurIRg0SqXgA/edit?usp=sharing"]}],[{"l":"Release Procedure","p":["We have release procedures for the following scenarios:","cBioPortal community release of code already in production","Release with database migration"]},{"l":"cBioPortal community release of code already in production","p":["We often run code in production that is not ready yet for use by the wider cBioPortal community. We deploy to production what's in the master branch of the backend repo and the frontend repo. Often times this is not a tagged release. At some point this code should be released for the wider community. These are the steps we follow:","Create a new frontend tag. The releases can be found here: https://github.com/cBioPortal/cbioportal-frontend/releases. A draft of the release notes are automatically generated by https://github.com/marketplace/actions/release-drafter. If there are pull requests in the Changes section i.e. they have not been labeled with one of the labels defined here. Try to label them and trigger a rerun by committing something to the master branch. Alternatively you can manually put them in a particular section. Note that our goal is to have automated release notes, so it would be great if you could send a PR to update the release-drafter.yml in case you find certain PRs don't fit in a particular section or a section should be altered. Look at other release notes for inspiration: https://github.com/cBioPortal/cbioportal-frontend/releases. You can save your work as a draft if necessary.","Once the frontend code is tagged, create a pull request to the backend repo where the frontend version is incremented in portal/pom.xml:","Once that PR is merged, one can create a tag for the backend repo with the same tag as the frontend repo. You should see a draft from release drafter similar to the frontend in the backend repo: https://github.com/cBioPortal/cbioportal/releases. The idea is to create one set of release notes in the backend repo that is a combination of the frontend and backend notes. To make the hyperlinks from the frontend repo work in the backend repo you can copy the frontend release notes raw markdown and run the following one liner to convert the links:","then put them in the right sections following same style as other releases: https://github.com/cBioPortal/cbioportal-frontend/releases.","Create a news item with a link to your carefully crafted release notes. Highlight a few major changes that could be interesting to users of cBioPortal ideally with a screenshot similar to: https://github.com/cBioPortal/cbioportal/pull/6914/files?short\\_path=6f95322#diff-6f953229832059bab3fe229d4af08b52(in the files changed section, you can click on view rich diff to see the converted markdown)."]},{"l":"Release with database migration","p":["For releases with database migrations, we increase the MINOR number in MAJOR.MINOR.PATCH. For those releases we have a separate branch (see https://github.com/cBioPortal/cbioportal/blob/master/CONTRIBUTING.md#branches-within-cbioportal), which needs to be merged to master on both backend and frontend:","Make sure no auto deployment is running for frontend from netlify","Merge frontend release-x.y.z branch to frontend master","Follow same procedure as for a PATCH release, but instead of having a separate PR to update the frontend (step 2) one can add it to the already existing backend branch release-x.y.z and open the PR from there to backend's master. This is merely for convenience to avoid having to create another branch just to update the frontend version."]},{"l":"A note on versioning"},{"l":"From pre-release to official release","p":["On the GitHub Release Page you will see that some releases have the pre-release indication whereas others do not. In general we make a new pre-release release every week. We test it out in production ( https://cbioportal.org) for one month and if no new critical issues are identified we make it an official release. Occasionally we make a new official release in less than a month's time if we identify a critical issue in the previous release."]},{"l":"cBioPortal Software Version Numbers","p":["We follow the following logic when deciding how/when to increment the version of cBioPortal. It's a complete modification of semantic versioning (MAJOR.MINOR.PATCH) more suitable for our purposes:","MAJOR : A big change in how cBioPortal works. We changed the major version from 1 to 2 when we completely moved from using JSPs to a Single Page App written in React calling a REST service. Another example: we changed from 2 to 3 when we made session-service a requirement.","MINOR : Changes that require a database migration, an upgrade to another cBioPortal component (e.g. session-service) or anything that could require additional effort for a deployer of cBioPortal (e.g. see transcript change).","PATCH : Changes that don't require database migrations. Could be new features as well as bug fixes to either frontend, backend or both.","See also: https://github.com/cBioPortal/cbioportal/releases"]},{"l":"cBioPortal Database Version Numbers","p":["cBioPortal database version numbers are different version numbers than the software version numbers, it's only updated when developers make database scheme changes, please see cBioPortal Database Versioning"]},{"l":"User announcements","p":["The following is a provisional system of alerting user to new features and announcements. It would probably be better for these messages to be configurable on an instance level by installers. For now, the following suffices.","Top banner: see sample configuration in src/shared/components/userMessager/UserMessage.tsx","For a beacon and associated dialog message, use this as a model, where child of InfoBeacon component is any component that will be shown when InfoBeacon is moused over and conditions are met"]}],[{"l":"Deployment Procedure","p":["This describes our internal deployment procedure. Shared publicly, in case it may be of use. Instructions on how to deploy cBioPortal can be found elsewhere, see e.g. Deploying the web application and Deploy using Docker.","We deploy the master branch of backend and the master branch of frontend to production. The public portal ( https://www.cbioportal.org) runs on AWS inside kubernetes. The configuration can be found in the knowledgesystems repo:","https://github.com/knowledgesystems/knowledgesystems-k8s-deployment","Other portals run at MSKCC on two internal machines called dashi and dashi2. Since we're running several apps in several tomcats internally the procedure for updating them is different from the public portal on AWS. The configuration is in the mercurial portal-configuration repo. To make changes, ask Ben for access.","The frontend and backend can be upgraded independently. We have the following events that can require a new deployment:","New frontend commit in master","New backend commit in master"]},{"l":"New frontend commit in master","p":["Currently we are auto-deploying the frontend master branch to netlify: https://frontend.cbioportal.org. So any change should be automatically built and deployed to the relevant portals if the frontend configuration has been set up properly. Do note that the current build time for the frontend project is ~ 15 minutes or so. To see what frontend commit is deployed, check window.FRONTEND_COMMIT in the console of the browser."]},{"l":"Public Portal Frontend URL","p":["The public portal is on AWS and running inside a Kubernetes cluster. The URL that it gets the frontend version from is here:","https://github.com/knowledgesystems/knowledgesystems-k8s-deployment/search?q=-Dfrontend.url&unscoped_q=-Dfrontend.url","This should be a URL pointing to netlify."]},{"l":"Internal Portal Frontend URL","p":["For the internally runnning portals the frontend.url is defined in the portal.properties file in the mercurial portal-configuration repo. If set up correctly, this should point to a file on both dashi and dashi2 that in turn points to a netlify frontend URL. The reason we have a separate file with the URL in it is that it allows us to update the frontend URL without redeploying the backend."]},{"l":"New backend commit in master","p":["A new backend commit usually also means a new frontend change is necessary. For this reason the following sections assume that's the case."]},{"l":"Public Portal Backend Upgrade","p":["Once the backend repo has been tagged on github, a docker image gets build on Docker Hub automatically. It can take ~ 5 min before the image is available. You can check here what the status of the builds is: https://github.com/cBioPortal/cbioportal/actions?query=workflow%3A%22Docker+Image+CI%22.","After that, if you have access to the kubernetes cluster you can change the image in the configuration of the kubernetes cluster:","https://github.com/knowledgesystems/knowledgesystems-k8s-deployment/blob/master/cbioportal/cbioportal_spring_boot.yaml","point this line, to the new tag on docker hub e.g.:","Make sure it is an image with the postfix -web-shenandoah. This is the image that only has the web part of cBioPortal and uses the shenandoah garbage collector.","Also remove the -Dfrontend.url parameter such that the frontend version inside the war will be used:","Then running this command applies the changes to the cluster:","You can keep track of what's happening by looking at the pods:","If you have the watch command installed you can also use that to see the output of this every 2s:","Another thing to look at is the events:","If there are any issues, point the image back to what it was, set-Dfrontend.url and run kubectl apply -f filename again.","If everything went ok, you can re-enable auto deployment on netlify, set-Dfrontend.url in the kubernetes file and run kubectl apply -f filename again.","Make sure to commit your changes to the knowledgesystems-k8s-deployment repo and push them to the main repo, so that other people making changes to the kubernetes config will be using the latest version."]},{"l":"Private Portal Backend Upgrade","p":["First update the frontend portal configuration to point to a new file. It's fine if this file does not exist yet, because if it doesn't the frontend bundled with the war will be used. We can later point the file to netlify, once we've determined everything looks ok.","You can use this for loop to update the frontend url in all properties files(set it to a file that doesn't exist yet and give it a sensible name e.g. frontend_url_version_x_y_z.txt):","Same for triage-tomcat (agin set the correct file name)::","Make sure you see the frontend url file updated correctly:","Then commit and push your changes to the mercurial repo:","If you have your public key added for the relevant deploy scripts you should be able to deploy with the following command on dashi-dev:","If you don't have a SSH key set up to run the deploy script ask Ino.","If everything looks ok you can update the frontend url file to point to netlify. Log in to dashi and become msk-tomcat with sudo su - msk-tomcat. Then change the update script:","to point oldurlfile=/srv/www/msk-tomcat/frontend_url_version_2_0_0.txt to the new frontend url file you supplied above.","Then update the url like:","Do the same thing on dashi2.","The last step is to modify the frontend url file for the triage portal. Log in to the pipelines machine, log in as triage-tomcat user: sudo su - triage-tomcat, and update the frontend url file there:"]},{"l":"Upgrading Related Backend Components","p":["Backend upgrades involving the database schema, DAO classes, etc. require updates to databases and importers. CBioPortal has multiple databases (located both internally on pipelines and in AWS) backing different portals. Similarly there are multiple importers responsible for loading portal-specific data. Every database must be manually migrated on an individual basis; all importers/data fetchers can be updated simultaenously through an existing deployment script.","Before upgrading, make sure to turn off import jobs in the crontab and alert the backend pipelines team (Avery, Angelica, Rob, Manda).","To access the crontab, log in to pipelines, log in as cbioportal_importer: sudo su - cbioportal_importer, and run crontab -e. Comment out any lines that run import jobs, save, and exit. Make sure to uncomment these lines once the upgrade (database and importers) is complete. Lines that need to be commented out will be under the Import Jobs section, shown here."]},{"l":"Updating Databases","p":["AWS","cbioportal.mskcc.org","cbioportal.org","cgds_gdac","cgds_genie","cgds_public","cgds_triage","Database","First, make sure there is a backup of the database being migrated. If there is not a weekly dump, backup the database being migrated using mysqldump. This process may take awhile depending on the size of the database.","genie.cbioportal.org","Location","pipelines","The second step is to migrate the database. Make sure that the migration script is the same version as the deployed cBioPortal website. It is recommended to first test the migration script manually line-by-line in a copy of the existing database. This will catch any data-related bugs that might not be captured by the python migration script. After testing is successful, migrate the production databases following these steps here.","These are all cBioPortal databases and their locations:","To obtain information such as usernames, passwords, hostnames - ask Avery, Angelica, Rob, Manda, and Ino.","triage.cbioportal.org","Website"]},{"i":"updating-importersdata-fetchers","l":"Updating Importers/Data Fetchers","p":["Importers (code found here) and data fetchers (code found here) use code from the cBioPortal codebase. The cbioportal dependency is packaged with the genome-nexus-annotation-pipeline and specified in the pipelines importer pom.","The following steps are used during releases/updates to build new importers with the most-up-to-date cBioPortal and genome-nexus-annotation-pipeline code. All steps should be performed on the pipelines machine.","Set the jitpack hash here in the genome-nexus-annotation-pipeline codebase to the most recent cbioportal/cbioportal commit hash in master.","Merge this change into genome-nexus-annotation-pipeline/master.","Set the commit hash here in the pipelines codebase to the most most recent genome-nexus/genome-nexus-annotation-pipeline commit hash (after merge specfied in step 2). Also ensure the db version in the pom here matches the db schema version in the cbioportal codebase.","Merge this change into pipelines/master.","Set the commit hash here in the cmo-pipelines codebase to the most recent genome-nexus/genome-nexus-annotation-pipeline commit hash (after merge specified in step 2)","Merge this change into cmo-pipelines/master","Run the deployment wrapper script. See details here.","Verify new importers/data fetchers have been placed in /data/portal-cron/lib by checking timestamps."]},{"l":"Deployment Script","p":["The wrapper script is found on pipelines here:/data/portal-cron/git-repos/pipelines-configuration/build-importer-jars/buildproductionjars.sh.","Run git pull to pull in any updates to the build script.","The wrapper script takes two arguments:","--cbioportal-git-hash (required): Set to the cBioPortal commit hash being used in the pipelines build (hash specified in step 1 of updating importers). This must match because the build copies out resource files (e.g application-context-business.xml) from the cbioportal codebase.","--skip-deployment (optional): Set to true to skip auto-deployment to /data/portal-cron/lib. Built jars will be found in /data/portal-cron/git-repos/pipelines-configuration/build-importer-jars/ and can be be manually moved.","The wrapper script will automatically backup the importers/data-fetchers to /data/portal-cron/lib/backup."]}],[{"l":"Documentation site","p":["This documentation site is created using https://retype.com/, a static site generator based on markdown.","Visit their site for installation instructions and a guide on how","Develop, build and test the site locally","Markdown syntax","Deploy to Github pages using Github actions."]},{"l":"Navigation","p":["Please note that the navigation for the site is defined in docs/SUMMARY.md. This is a deprecated form of configuration which is not documented in Retype."]}]]
\ No newline at end of file
+[[{"i":"welcome-to-the-documentation-for-cbioportal","l":"Welcome to the documentation for cBioPortal!","p":["We've organized this site by the different roles in the cBioPortal community.","cBioPortal Google Group","We maintain an active list of RFCs (Requests for Comments) where we describe new features and solicit community feedback.","FAQ","Tutorials","API documentation"]}],[{"l":"About Us","p":["The cBioPortal for Cancer Genomics was originally developed at Memorial Sloan Kettering Cancer Center(MSK). The public cBioPortal site is hosted by the Center for Molecular Oncology at MSK. The cBioPortal software is now available under an open source license via GitHub. The software is now developed and maintained by a multi-institutional team, consisting of MSK, the Dana Farber Cancer Institute, Princess Margaret Cancer Centre in Toronto, Children's Hospital of Philadelphia, Caris Life Sciences, The Hyve and SE4BIO in the Netherlands, and Bilkent University in Ankara, Turkey."]},{"l":"Memorial Sloan Kettering Cancer Center","p":["Aaron Lisman","Angelica Ochoa","Anusha Satravada","Avery Wang","Benjamin Gross","Bryan Lai","Calla Chennault","Gaofei Zhao","Hongxin Zhang","Ino de Bruijn","Manda Wilson","Nikolaus Schultz","Ramyasree Madupuri","Rima AlHamad","Ritika Kundra","Robert Sheridan","S Onur Sumer","Xiang Li"]},{"l":"Dana-Farber Cancer Institute","p":["Ethan Cerami","Tali Mazor","Jeremy Easton-Marks","Zhaoyuan (Ryan) Fu","Augustin Luna","James Lindsay","Chris Sander"]},{"i":"princess-margaret-cancer-centre-toronto","l":"Princess Margaret Cancer Centre, Toronto","p":["Prasanna Jagannathan","Trevor Pugh"]},{"i":"childrens-hospital-of-philadelphia","l":"Children's Hospital of Philadelphia","p":["Charles Haynes","David Higgins","Allison Heath","John Maris","Adam Resnick","Miguel Brown"]},{"l":"Caris Life Sciences","p":["Jianjiong Gao","Priti Kumari","Karthik Kalletla"]},{"l":"The Hyve","p":["Oleguer Plantalech","Pim van Nierop","Sander Rodenburg","Bas Leenknegt","Elena G Lara","Jessica Singh","Matthijs Pon","Tim Kuijpers","Mirella Kalafati","Sjoerd van Hagen"]},{"l":"SE4BIO","p":["Pieter Lukasse","Ruslan Forostianov"]},{"l":"Bilkent University","p":["Ugur Dogrusoz","Yusuf Ziya Ozgul"]},{"l":"Alumni","p":["Adam Abeshouse","Alexandros Sigaras","Anders Jacobsen","Andy Dufilie","Arthur Goldberg","B Arman Aksoy","Caitlin Byrne","Catherine Del Vecchio Fitz","Diana Baiceanu","Dionne Zaal","Divya Madala","Dong Li","Erik Larsson","Ersin Ciftci","Fedde Schaeffer","Fred Criscuolo","Gideon Dresdner","Hsiao-Wei Chen","Irina Pulyakhina","Istemi Bahceci","James Xu","Jiaojiao Wang","Jing Su","Kaan Sancak","Kees van Bochove","Kelsey Zhu","Leonard Dervishi","Luke Sikina","M Furkan Sahin","M Salih Altun","Michael Heuer","Ngoc Nguyen","Olivier Elemento","Paul van Dijk","Peter Kok","Pichai Raman","Riza Nugraha","Sander Tan","Stuart Watt","Tamba Monrose","Yichao Sun","Zachary Heins","Ziya Erkoc"]},{"i":"funding-for-the-cbioportal-for-cancer-genomics-is-or-has-been-provided-by","l":"Funding for the cBioPortal for Cancer Genomics is or has been provided by:"},{"i":"current","l":"Current:","p":["NCI, through ITCR grant NCI-U24CA274633 and HTAN grant NCI-U24CA233243","Marie-José and Henry R. Kravis Center for Molecular Oncology at MSK","Dana Farber Cancer Institute","American Association for Cancer Research through AACR Project GENIE","Prostate Cancer Foundation","The Cholangiocarcinoma Foundation","Robertson Foundation"]},{"i":"past","l":"Past:","p":["NCI, through ITCR grant NCI-U24CA220457","Stand Up 2 Cancer","The Ben & Catherine Ivy Foundation","NCI, as a TCGA Genome Data Analysis Center (GDAC)(NCI-U24CA143840)","NCRR, as the National Resource for Network Biology (NRNB) Research Resource (RR 031228-02)","Starr Cancer Consortium","Breast Cancer Research Foundation","Adenoid Cystic Carcinoma Research Foundation","POETIC Consortium","Parker Institute for Cancer Immunotherapy"]}],[{"l":"List of Active RFCs","p":["We maintain an active set of RFCs (Requests for Comments) where we spec out new features and solicit community feedback.","See this shared google folder for the list of RFCs."]},{"l":"For Developers Creating new RFCs","p":["Use the RFC Template","Create your RFC within this shared google folder, and pick a new incremental number.","Add a Link to your RFC on this page."]}],[{"l":"User Guide","p":["The cBioPortal for Cancer Genomics is a resource for interactive exploration of multidimensional cancer genomics data sets. The goal of cBioPortal is to significantly lower the barriers between complex genomic data and cancer researchers by providing rapid, intuitive, and high-quality access to molecular profiles and clinical attributes from large-scale cancer genomics projects, and therefore to empower researchers to translate these rich data sets into biologic insights and clinical applications.","The following section guides users in performing queries and analysis on any instance of cBioportal."]}],[{"l":"New Users","p":["Are you new to cBioPortal? Welcome! We have a few options to help you get started.","If you have an hour, we highly recommend watching the recording of our Introduction to cBioPortal webinar ( youtube.com or bilibili.com).","Don't have an hour? Review our tutorial slides for exploring a study ( Google slides or PDF) and running a query ( Google slides or PDF)","Or, watch two of our short how-to videos which demonstrate how to explore a study ( youtube.com) and how to run a query ( youtube.com)."]}],[{"l":"cBioPortal FAQs","p":["Analysis Questions","Are there any normal tissue samples available through cBioPortal?","Can I change the order of genes in the OncoPrint?","Can I create a local instance of cBioPortal to host my own data?","Can I download all data at once?","Can I save or bookmark my results in cBioPortal?","Can I use cBioPortal with my own data?","Can I use figures from the cBioPortal in my publications or presentations?","Can I visualize my own data within an OncoPrint?","Clinical Data","Data Questions","DNA (Mutations, Copy Number & Fusions)","DNA Methylation","Does the cBioPortal contain synonymous mutation data?","Does the cBioPortal provide a Web Service API? R interface? MATLAB interface?","Does the Mutual Exclusivity tab calculate its statistics using all samples/alterations or only a specific subset?","Does the portal contain cancer study X?","Does the portal store raw or probe-level data?","Does the portal work on all browsers and operating systems?","General Data","General Questions","How are protein domains in the mutational lollipop diagrams specified?","How can I compare outcomes in patients with high vs low expression of a gene?","How can I compare two or more subsets of samples?","How can I create a subset or sub cohort of a study with specific samples or patients?","How can I download the PanCancer Atlas data?","How can I find which studies have mRNA expression data (or any other specific data type)?","How can I form a combined Study?","How can I query microRNAs in the portal?","How can I query over/under expression of a gene?","How can I query phosphoprotein levels in the portal?","How can I query/explore a select subset of samples?","How do I access data from AACR Project GENIE?","How do I cite the cBioPortal?","How do I get started?","How do I get updates on new portal developments and new data sets?","How does cBioPortal handle duplicate samples or sample IDs across different studies?","How does TCGA data in cBioPortal compare to TCGA data in Genome Data Commons?","How is TCGA RNASeqV2 processed? What units are used?","How is the cBioPortal for Cancer Genomics different from the Genomic Data Commons (GDC)?","How to use filter in the URL of Study View page?","I'd like to contribute code to the cBioPortal. How do I get started?","Is it necessary to log in to use virtual studies? If I do log in, what additional functionality do I gain?","Is it possible to determine if a particular mutation is heterozygous or homozygous in a sample? When a sample has 2 mutations in one gene, is it possible to determine whether the mutations are in cis or in trans with each other?","Is there any normal RNA-seq data in cBioPortal?","Is there microRNA data?","OncoPrint","Other pages","Protein","Results View","RNA","Study View","TCGA","The data today is different than the last time i looked. What happened?","What are mRNA and microRNA Z-Scores?","What are OncoPrints?","What are TCGA Firehose Legacy datasets and how do they compare to the publication-associated datasets and the PanCancer Atlas datasets?","What are the statistical significance tests in Group Comparison?","What are the values of the box and whiskers in a boxplot?","What data types are in the portal?","What do “Amplification”, “Gain”, “Deep Deletion”, “Shallow Deletion” and \"-2\", \"-1\", \"0\", \"1\", and \"2\" mean in the copy-number data?","What does ___ stand for?","What happened to TCGA Provisional datasets?","What if I have other questions or comments?","What is a combined Study?","What is a Virtual Study?","What is GISTIC? What is RAE?","What is Group Comparison?","What is the cBioPortal for Cancer Genomics?","What is the difference between a “splice site” mutation and a “splice region” mutation?","What is the meaning of OS_STATUS / OS_MONTHS, and PFS_STATUS / PFS_MONTHS?","What is the process of data curation?","What kind of clinical data is stored in the portal?","What processing or filtering is applied to generate the mutation data?","What transcripts are used for annotating mutations?","What version of the human reference genome is being used in cBioPortal?","Where do the thresholded copy number call in TCGA Firehose Legacy data come from?","Which methylation probe is used for genes with multiple probes?","Which resources are integrated for variant annotation?","Which studies have MutSig and GISTIC results? How do these results compare to the data in the TCGA publications?","Why are some samples “Not profiled” for certain genes?","Why isn’t there protein data for my gene of interest?"]},{"l":"General Questions"},{"i":"what-is-the-cbioportal-for-cancer-genomics","l":"What is the cBioPortal for Cancer Genomics?","p":["The cBioPortal for Cancer Genomics is an open-access, open-source resource for interactive exploration of multidimensional cancer genomics data sets. The goal of cBioPortal is to significantly lower the barriers between complex genomic data and cancer researchers by providing rapid, intuitive, and high-quality access to molecular profiles and clinical attributes from large-scale cancer genomics projects, and therefore to empower researchers to translate these rich data sets into biologic insights and clinical applications."]},{"i":"how-do-i-get-started","l":"How do I get started?","p":["Check out our tutorial slides to get started or go through our tutorial paper."]},{"i":"what-data-types-are-in-the-portal","l":"What data types are in the portal?","p":["The portal supports and stores non-synonymous mutations, DNA copy-number data (putative, discrete values per gene, e.g. \"deeply deleted\" or \"amplified\", as well as log2 or linear copy number data), mRNA and microRNA expression data, protein-level and phosphoprotein level data (RPPA or mass spectrometry based), DNA methylation data, and de-identified clinical data. For a complete breakdown of available data types per cancer study go to the Data Sets Page. Note that for many studies, only somatic mutation data and limited clinical data are available. For TCGA studies, the other data types are also available. Germline mutations are supported by cBioPortal, but are, with a few exceptions, not available in the public instance."]},{"i":"what-does-___-stand-for","l":"What does ___ stand for?","p":["Here are the meanings of some of the abbreviations used by cBioPortal:","VUS: variant of unknown significance","CNA: copy number alteration","AMP: amplification","HOMDEL: deep deletion","TMB: tumor mutational burden, calculated as mutations per megabase of sequenced DNA","KM: Kaplan-Meier","MSI: microsatellite instability","OQL: Onco Query Language, used within cBioPortal to define the types of alterations included in a query. For more on OQL, review the documentation, tutorial slides, and videos"]},{"i":"what-is-the-process-of-data-curation","l":"What is the process of data curation?","p":["The TCGA firehose legacy datasets are imported directly from the original TCGA Data Coordinating Center via the Broad Firehose.","We are also actively curating datasets from the literature. Studies from the literature were curated from the data published with the manuscripts. We sometimes reach out to the investigators to acquire additional data, such as clinical attributes. All mutation calls (in VCF or MAF format) are processed through an internal pipeline to annotate the variant effects in a consistent way across studies. Please contact us to suggest additional public datasets to curate or view the list of studies suggested for curation in our Datahub on Github."]},{"i":"how-do-i-get-updates-on-new-portal-developments-and-new-data-sets","l":"How do I get updates on new portal developments and new data sets?","p":["Please subscribe to our low-volume news mailing list or follow @cbioportal on Twitter."]},{"i":"does-the-portal-work-on-all-browsers-and-operating-systems","l":"Does the portal work on all browsers and operating systems?","p":["We support and test on the following web browsers: Safari, Google Chrome, Firefox and Edge. (As of release v3.5.4 we no longer support Internet Explorer 11). If you notice any incompatibilities, please let us know."]},{"i":"how-do-i-cite-the-cbioportal","l":"How do I cite the cBioPortal?","p":["Please cite the following portal papers:","Cerami et al. The cBio Cancer Genomics Portal: An Open Platform for Exploring Multidimensional Cancer Genomics Data. Cancer Discovery. May 2012 2; 401. PubMed.","Gao et al. Integrative analysis of complex cancer genomics and clinical profiles using the cBioPortal. Sci. Signal. 6, pl1 (2013). PubMed.","de Bruijn et al. Analysis and Visualization of Longitudinal Genomic and Clinical Data from the AACR Project GENIE Biopharma Collaborative in cBioPortal. Cancer Res (2023). PubMed.","Remember also to cite the source of the data if you are using a publicly available dataset."]},{"i":"can-i-use-figures-from-the-cbioportal-in-my-publications-or-presentations","l":"Can I use figures from the cBioPortal in my publications or presentations?","p":["Yes, you are free to use any of the figures from the portal in your publications or presentations (many are available in SVG or PDF format for easier scaling and editing). When you do, please cite Cerami et al., Cancer Discov. 2012, and Gao et al., Sci. Signal. 2013 (see the previous question for full citations)."]},{"i":"can-i-save-or-bookmark-my-results-in-cbioportal","l":"Can I save or bookmark my results in cBioPortal?","p":["You can bookmark your query results and share the URL with collaborators. We store all queries via Session IDs, and these are saved indefinitely. Use the bookmark tab to retrieve the full link, or generate a short link via the bit.ly link generator."]},{"i":"how-is-the-cbioportal-for-cancer-genomics-different-from-the-genomic-data-commons-gdc","l":"How is the cBioPortal for Cancer Genomics different from the Genomic Data Commons (GDC)?","p":["The cBioPortal is an exploratory analysis tool for exploring large-scale cancer genomic data sets that hosts data from large consortium efforts, like TCGA and TARGET, as well as publications from individual labs. You can quickly view genomic alterations across a set of patients, across a set of cancer types, perform survival analysis and perform group comparisons. If you want to explore specific genes or a pathway of interest in one or more cancer types, the cBioPortal is probably where you want to start.","By contrast, the Genomic Data Commons (GDC) aims to be the definitive place for full-download and access to all data generated by TCGA and TARGET. If you want to download raw mRNA expression files or full segmented copy number files, the GDC is probably where you want to start."]},{"i":"does-the-cbioportal-provide-a-web-service-api-r-interface-matlab-interface","l":"Does the cBioPortal provide a Web Service API? R interface? MATLAB interface?","p":["Yes, the cBioPortal provides a Swagger API, and R/MATLAB interfaces."]},{"i":"can-i-use-cbioportal-with-my-own-data","l":"Can I use cBioPortal with my own data?","p":["cBioPortal provides several options for analyzing your own data. Visit our Visualize Your Data page to generate an OncoPrint or Lollipop Plot with your own data. To utilize the entire suite of analysis and visualization tools, you can also install your own instance of cBioPortal (see next question)."]},{"i":"can-i-create-a-local-instance-of-cbioportal-to-host-my-own-data","l":"Can I create a local instance of cBioPortal to host my own data?","p":["Yes, the cBioPortal is open-source, and available on GitHub. Our documentation provides complete download and installation instructions."]},{"i":"id-like-to-contribute-code-to-the-cbioportal-how-do-i-get-started","l":"I'd like to contribute code to the cBioPortal. How do I get started?","p":["Great! We would love to have your contributions. To get started, head over to our GitHub repository and check out our page on how to contribute."]},{"i":"what-is-a-combined-study","l":"What is a combined Study?","p":["A combined study is a custom study comprised of samples from multiple studies. The combined study feature enables you to combine samples from multiple studies to form a bigger study. This cohort of samples can then be queried or explored just like a traditional study, and can be returned to at a later date or shared with a collaborator."]},{"i":"how-can-i-form-a-combined-study","l":"How can I form a combined Study?","p":["A combined or merged study is a custom study comprised of samples from multiple studies. In the homepage of cbioportal, studies can be selected using the checkbox located on the left of the study. Once the studies are selected, they can be combined and explored using the \"Explore Selected Studies\" button. Alternatively, after the studies are selected, you can run queries on the combined study using the \"Query by Gene\" button."]},{"i":"how-can-i-create-a-subset-or-sub-cohort-of-a-study-with-specific-samples-or-patients","l":"How can I create a subset or sub cohort of a study with specific samples or patients?","p":["A subset or sub cohort of a study can be created by specifying individual patients or samples. After a study is selected, user can click on the \"Custom selection\" button to create a new filter by specifying the sampleID or patientID that the user is interested to explore. Another way is to filter a set of patients using the charts on the study view and then view the IDs of the patients and samples that were selected or unselected based on the current filter."]},{"i":"what-is-a-virtual-study","l":"What is a Virtual Study?","p":["A virtual study is a custom study comprised of samples from one or more existing studies. The virtual study feature allows you to define a custom cohort of samples that fit your specific genomic or clinical criteria of interest. These samples can be a subset of the data available in an existing study, or result from the combination of multiple existing studies. This cohort of samples can then be queried or explored just like a traditional study, and can be returned to at a later date or shared with a collaborator. For more information and examples, see our tutorial on virtual studies."]},{"i":"is-it-necessary-to-log-in-to-use-virtual-studies-if-i-do-log-in-what-additional-functionality-do-i-gain","l":"Is it necessary to log in to use virtual studies? If I do log in, what additional functionality do I gain?","p":["No. A user that has not logged in can create virtual studies and run queries in those studies (by using the query box on the study summary page). Links to virtual studies are permanent, so you can save the link on your computer and come back to it anytime, or share it with others.","If you log in, you gain the ability to save your virtual study to the list of existing studies on the homepage. This makes a virtual study functionally the same as any other study: you can access your virtual studies in the query builder and you can combine an existing virtual study with any other study to create a new virtual study."]},{"i":"what-is-group-comparison","l":"What is Group Comparison?","p":["Group Comparison is a suite of analysis features which allows a user to compare clinical or genomic features of user-defined groups of samples. These groups can be defined based on any clinical or genomic features. For an overview, see our tutorial on group comparison."]},{"i":"what-are-the-statistical-significance-tests-in-group-comparison","l":"What are the statistical significance tests in Group Comparison?","p":["Survival tab: Log-rank test","Clinical tab:","Continuous data: Chi-squared test","Categorical data: Kruskal Wallis test","Other tabs","2 groups","Continuous data: one-sided t-test","Categorical data: two-sided Fisher's exact test","3 or more groups","Continuous data: one-way ANOVA","Categorical data: Chi-squared test"]},{"l":"Data Questions"},{"l":"General Data"},{"i":"does-the-portal-contain-cancer-study-x","l":"Does the portal contain cancer study X?","p":["Check out the Data Sets Page for the complete set of cancer studies currently stored in the portal. If you do not see your specific cancer study of interest, please contact us, and we will let you know if it's in the queue."]},{"i":"which-resources-are-integrated-for-variant-annotation","l":"Which resources are integrated for variant annotation?","p":["cBioPortal supports the annotation of variants from several different databases. These databases provide information about the recurrence of, or prior knowledge about, specific amino acid changes. For each variant, the number of occurrences of mutations at the same amino acid position present in the COSMIC database are reported. Furthermore, variants are annotated as “hotspots” if the amino acid positions were found to be recurrent linear hotspots, as defined by the Cancer Hotspots method ( cancerhotspots.org), or three-dimensional hotspots, as defined by 3D Hotspots ( 3dhotspots.org). Prior knowledge about variants, including clinical actionability information, is provided from three different sources: OncoKB ( www.oncokb.org), CIViC ( civicdb.org), as well as My Cancer Genome ( mycancergenome.org). For OncoKB, exact levels of clinical actionability are displayed in cBioPortal, as defined by the OncoKB paper."]},{"i":"what-version-of-the-human-reference-genome-is-being-used-in-cbioportal","l":"What version of the human reference genome is being used in cBioPortal?","p":["The public cBioPortal is currently using hg19/GRCh37."]},{"i":"how-does-cbioportal-handle-duplicate-samples-or-sample-ids-across-different-studies","l":"How does cBioPortal handle duplicate samples or sample IDs across different studies?","p":["The cBioPortal generally assumes that samples or patients that have the same ID are actually the same. This is important for cross-cancer queries, where each sample should only be counted once. If a sample is part of multiple cancer cohorts, its alterations are only counted once in the Mutations tab (it will be listed multiple times in the table, but is only counted once in the lollipop plot). However, other tabs (including OncoPrint and Cancer Types Summary) will count the sample twice - for this reason, we advise against querying multiple studies that contain the same samples (e.g., TCGA PanCancer Atlas and TCGA Firehose Legacy)."]},{"i":"are-there-any-normal-tissue-samples-available-through-cbioportal","l":"Are there any normal tissue samples available through cBioPortal?","p":["No, we currently do not store any normal tissue data in our system."]},{"i":"how-can-i-find-which-studies-have-mrna-expression-data-or-any-other-specific-data-type","l":"How can I find which studies have mRNA expression data (or any other specific data type)?","p":["Check out the Data Sets Page where you can view the complete set of cancer studies and sort by the number of samples with data available for any data type."]},{"i":"can-i-download-all-data-at-once","l":"Can I download all data at once?","p":["You can download all data for individual studies on the Data Sets Page or the study view page for the study of interest. You can also download all studies from our Data Hub."]},{"i":"the-data-today-is-different-than-the-last-time-i-looked-what-happened","l":"The data today is different than the last time I looked. What happened?","p":["We do occasionally update existing datasets to provide the most up-to-date, accurate and consistent data possible. The data you see today is likely an improved version of what you have seen previously. However, if you suspect that there is an error in the current version, please let us know at cbioportal@googlegroups.com.","If you need to reference an old version of a dataset, you can find previous versions in our Datahub repository."]},{"i":"how-do-i-access-data-from-aacr-project-genie","l":"How do I access data from AACR Project GENIE?","p":["Data from AACR Project GENIE are provided in a dedicated instance of cBioPortal. You can also download GENIE data from the Synapse Platform. Note that you will need to register before accessing the data. Additional information about AACR Project GENIE can be found on the AACR website."]},{"l":"TCGA"},{"i":"how-does-tcga-data-in-cbioportal-compare-to-tcga-data-in-genome-data-commons","l":"How does TCGA data in cBioPortal compare to TCGA data in Genome Data Commons?","p":["We do not currently load the mutation data from the GDC. Instead, we have the original mutation data generated by the individual TCGA sequencing centers. The source of the data is the Broad Firehose (or the publication pages for data that matches a specific manuscript). These data are usually a combination of two mutation callers, but they differ by center (typically a variant caller like MuTect plus an indel caller), and sequencing centers have modified their mutation calling pipelines over time."]},{"i":"what-happened-to-tcga-provisional-datasets","l":"What happened to TCGA Provisional datasets?","p":["We renamed TCGA Provisional datasets to TCGA Firehose Legacy to better reflect that this data comes from a legacy processing pipeline. The exact same data is now available in TCGA Firehose Legacy studies."]},{"i":"what-are-tcga-firehose-legacy-datasets-and-how-do-they-compare-to-the-publication-associated-datasets-and-the-pancancer-atlas-datasets","l":"What are TCGA Firehose Legacy datasets and how do they compare to the publication-associated datasets and the PanCancer Atlas datasets?","p":["The Firehose Legacy dataset (formerly Provisional datasets) for each TCGA cancer type contains all data available from the Broad Firehose. The publication datasets reflect the data that were used for each of the publications. The samples in a published dataset are usually a subset of the firehose legacy dataset, since manuscripts were often written before TCGA completed their goal of sequencing 500 tumors.","There can be differences between firehose legacy and published data. For example, the mutation data in the publication usually underwent more QC, and false positives might have been removed or, in rare cases, false negatives added. RNA-Seq and copy-number values may also differ slightly, as different versions of analysis pipelines could have been used. Additionally, due to additional curation during the publication process, the clinical data for the publication may be of higher quality or may contain a few more data elements, sometimes derived from the genomic data (e.g., genomic subtypes).","The TCGA PanCancer Atlas datasets derive from an effort to unify TCGA data across all tumor types. Publications resulting from this effort can be found at the TCGA PanCancer Atlas site. In the cBioPortal, data from the PanCancer Atlas is divided by tumor type, but these studies have uniform clinical elements, consistent processing and normalization of mutations, copy number, mRNA data and are ideally processed for comparative analyses."]},{"i":"where-do-the-thresholded-copy-number-call-in-tcga-firehose-legacy-data-come-from","l":"Where do the thresholded copy number call in TCGA Firehose Legacy data come from?","p":["Thresholded copy number calls in the TCGA Firehouse Legacy datasets are generated by the GISTIC 2.0 algorithm and obtained from the Broad Firehose."]},{"i":"which-studies-have-mutsig-and-gistic-results-how-do-these-results-compare-to-the-data-in-the-tcga-publications","l":"Which studies have MutSig and GISTIC results? How do these results compare to the data in the TCGA publications?","p":["MutSig and GISTIC results about the statistical significance of recurrence of mutations and copy-number alterations in specific genes are available for many TCGA studies. The MutSig and GISTIC results reported in cBioPortal are based on the same mutations and copy number data reported in each TCGA publication, or the Broad Firehose for the firehose legacy data sets. However, the publication may or may not have included the complete MutSig and GISTIC output, and therefore there may be some discrepancies between the publication and the data in cBioPortal."]},{"i":"how-can-i-download-the-pancancer-atlas-data","l":"How can I download the PanCancer Atlas data?","p":["PanCancer Atlas data can be downloaded on a study-by-study basis from cBioPortal through the Datasets page or our DataHub. To download all cancer types together, try the Genomic Data Commons PanCancer Atlas page."]},{"i":"dna-mutations-copy-number--fusions","l":"DNA (Mutations, Copy Number & Fusions)"},{"i":"does-the-cbioportal-contain-synonymous-mutation-data","l":"Does the cBioPortal contain synonymous mutation data?","p":["No, the cBioPortal does not currently support synonymous mutations. This may change in the future, but we have no plans yet to add this feature."]},{"i":"what-processing-or-filtering-is-applied-to-generate-the-mutation-data","l":"What processing or filtering is applied to generate the mutation data?","p":["Within cBioPortal, we utilize the mutation calls as provided by each publication. We do not perform any additional filtering. The only processing we do is to standardize the annotation of the mutations using Genome Nexus(which utilizes VEP with the canonical MSKCC transcript). Read more about the transcript assignments here. For specifics of which tools were used to call mutations and filters that may have been applied, refer to the publication manuscript."]},{"i":"what-transcripts-are-used-for-annotating-mutations","l":"What transcripts are used for annotating mutations?","p":["Prior to loading a study into cBioPortal, we run all mutation data through a standard pipeline (see above), which re-annotates all mutations to the canonical MSKCC transcript. Read more about the transcript assignments here."]},{"i":"how-are-protein-domains-in-the-mutational-lollipop-diagrams-specified","l":"How are protein domains in the mutational lollipop diagrams specified?","p":["Protein domain definitions come from PFAM."]},{"i":"what-is-the-difference-between-a-splice-site-mutation-and-a-splice-region-mutation","l":"What is the difference between a “splice site” mutation and a “splice region” mutation?","p":["A “splice site” mutation occurs in an intron, in a splice acceptor or donor site (2bp into an intron adjacent to the intron/exon junction), defined by Sequence Ontology. “Splice region” mutations are mutations that occur near the intron/exon junction, defined by Sequence Ontology. While synonymous mutations are generally excluded from cBioPortal, these “splice region” synonymous mutations are included due to their potential impact on splicing."]},{"i":"what-do-amplification-gain-deep-deletion-shallow-deletion-and--2--1-0-1-and-2-mean-in-the-copy-number-data","l":"What do “Amplification”, “Gain”, “Deep Deletion”, “Shallow Deletion” and \"-2\", \"-1\", \"0\", \"1\", and \"2\" mean in the copy-number data?","p":["These levels are derived from copy-number analysis algorithms like GISTIC or RAE, and indicate the copy-number level per gene:","-2 or Deep Deletion indicates a deep loss, possibly a homozygous deletion","-1 or Shallow Deletion indicates a shallow loss, possibley a heterozygous deletion","0 is diploid","1 or Gain indicates a low-level gain (a few additional copies, often broad)","2 or Amplification indicate a high-level amplification (more copies, often focal)","Note that these calls are putative. We consider the deep deletions and amplifications as biologically relevant for individual genes by default. Note that these calls are usually not manually reviewed, and due to differences in purity and ploidy between samples, there may be false positives and false negatives."]},{"i":"what-is-gistic-what-is-rae","l":"What is GISTIC? What is RAE?","p":["Copy number data sets within the portal are often generated by the GISTIC or RAE algorithms. Both algorithms attempt to identify significantly altered regions of amplification or deletion across sets of patients. Both algorithms also generate putative gene/patient copy number specific calls, which are then input into the portal.","For TCGA studies, the table in allthresholded.bygenes.txt (which is the part of the GISTIC output that is used to determine the copy-number status of each gene in each sample in cBioPortal) is obtained by applying both low- and high-level thresholds to to the gene copy levels of all the samples. The entries with value +/- 2 exceed the high-level thresholds for amplifications/deep deletions, and those with +/- 1 exceed the low-level thresholds but not the high-level thresholds. The low-level thresholds are just the 'ampthresh' and 'delthresh' noise threshold input values to GISTIC (typically 0.1 or 0.3) and are the same for every thresholds.","By contrast, the high-level thresholds are calculated on a sample-by-sample basis and are based on the maximum (or minimum) median arm-level amplification (or deletion) copy number found in the sample. The idea, for deletions anyway, is that this level is a good approximation for hemizygous losses given the purity and ploidy of the sample. The actual cutoffs used for each sample can be found in a table in the output file sample_cutoffs.txt. All GISTIC output files for TCGA are available at: gdac.broadinstitute.org."]},{"l":"RNA"},{"i":"does-the-portal-store-raw-or-probe-level-data","l":"Does the portal store raw or probe-level data?","p":["No, the portal only contains gene-level data. Data for different isoforms of a given gene are merged. Raw and probe-level data for data sets are available via NCBI GEO, dbGaP or through the GDC. See the cancer type description on the main query page or refer to the original publication for links to the raw data."]},{"i":"what-are-mrna-and-microrna-z-scores","l":"What are mRNA and microRNA Z-Scores?","p":["For mRNA and microRNA expression data, we typically compute the relative expression of an individual gene in a tumor sample to the gene's expression distribution in a reference population of samples. That reference population is all profiled samples (by default for mRNA), or normal samples (when specified), or all samples that are diploid for the gene in question (discontinued). The returned value indicates the number of standard deviations away from the mean of expression in the reference population (Z-score). The normalization method is described here. Please note that the expression results by querying a gene with the default setting (z-score threshold of 2) oftentimes are not meaningful. Since the z-scores were usually calculated compared to other tumor samples, high or low expression does not necessarily mean that the gene is expressed irregularly in tumors. The data is useful for correlation analysis, for example, pick a threshold based on overall expression (using Plots tab) and compare survival data between expression high and low groups."]},{"i":"is-there-any-normal-rna-seq-data-in-cbioportal","l":"Is there any normal RNA-seq data in cBioPortal?","p":["We have RNASeqV2 mRNA expression data for normal samples of 16 TCGA PanCan Atlas Cohorts. The data was curated from GDC, and can be downloaded from our Datahub or Data Set page. This data is not directly queriable in portal; they are only used as reference data for calculating the \"relavtive to normal expression z-score\" profile. Example: ERBB2 expression z-scores relative to normal expression."]},{"i":"how-is-tcga-rnaseqv2-processed-what-units-are-used","l":"How is TCGA RNASeqV2 processed? What units are used?","p":["RNASeqV2 from TCGA is processed and normalized using RSEM. Specifically, the RNASeq V2 data in cBioPortal corresponds to the rsem.genes.normalized_results file from TCGA. A more detailed explanation of RSEM output can be found here. cBioPortal then calculates z-scores as described above in What are mRNA and microRNA Z-Scores?"]},{"i":"is-there-microrna-data","l":"Is there microRNA data?","p":["We have microRNA data for only a few studies and they are not up to date. To download more updated miRNA data, please go to either Broad Firehose, or GDC."]},{"i":"how-can-i-query-micrornas-in-the-portal","l":"How can I query microRNAs in the portal?","p":["You can input either precursor or mature miRNA IDs. Since one precursor ID may correspond to multiple mature IDs and vise versa, the portal creates one internal ID for each pair of precursor ID and mature ID mapping. For example, an internal ID of MIR-29B-1/29B stands for precursor microRNA hsa-mir-29b-1 and mature microRNA hsa-miR-29b. After entering a precursor or mature ID, you will be asked to select one internal ID for query and that internal ID will also be displayed in the Oncoprint."]},{"l":"Protein"},{"i":"how-can-i-query-phosphoprotein-levels-in-the-portal","l":"How can I query phosphoprotein levels in the portal?","p":["You need to input special IDs for each phosphoprotein/phopshosite such as AKT1_pS473 (which means AKT1 protein phosphorylated at serine residue at position 473). You could also input aliases such as phosphoAKT1 or phosphoprotein, and the portal will ask you to select the phosphoprotein/phosphosite of your interest. Note that phosphoprotein data is only available for select studies and for a limited number of proteins / phosphorylation sites."]},{"i":"why-isnt-there-protein-data-for-my-gene-of-interest","l":"Why isn’t there protein data for my gene of interest?","p":["Most of the protein expression data in cBioPortal comes from assays like RPPA which only interrogate a subset of all proteins. TCGA ovarian, breast, and colorectal firehose legacy studies also have mass-spectrometry-based proteomics data from CPTAC which cover more genes/proteins."]},{"l":"DNA Methylation"},{"i":"which-methylation-probe-is-used-for-genes-with-multiple-probes","l":"Which methylation probe is used for genes with multiple probes?","p":["For genes with multiple probes (usually from the Infinium arrays), we only include methylation data from the probe with the strongest negative correlation between the methylation signal and the gene's expression in the study (TCGA only)."]},{"l":"Clinical Data"},{"i":"what-kind-of-clinical-data-is-stored-in-the-portal","l":"What kind of clinical data is stored in the portal?","p":["The portal currently stores de-identified clinical data, such as gender, age, tumor type, tumor grade, overall and disease-free survival data, when available. The available clinical data will differ from study to study."]},{"i":"what-is-the-meaning-of-os_status--os_months-and-pfs_status--pfs_months","l":"What is the meaning of OS_STATUS / OS_MONTHS, and PFS_STATUS / PFS_MONTHS?","p":["OS_STATUS means overall survival status (\"0\" -> \"living\" or \"1\" -> \"deceased\") and OS_MONTHS indicates the number of months from time of diagnosis to time of death or last follow up. PFS refers to “progression free survival”, indicating whether patient’s disease has recurred/progressed (PFS_STATUS), and at what time the disease recurred or the patient was last seen (PFS_MONTHS)."]},{"l":"Analysis Questions"},{"i":"how-can-i-queryexplore-a-select-subset-of-samples","l":"How can I query/explore a select subset of samples?","p":["cBioPortal allows you to run a query or explore study view using a user-specified list of samples/patients.","The first step is to define your sample set. There are two slightly different approaches you can take to defining your sample set, depending on whether you are selecting based on a positive criteria (samples with TP53 mutations) or a negative criteria (samples without a KRAS mutation).","Let’s take the positive criteria example first. Run a query for TP53 mutations using OQL (TP53: MUT) in your study of interest. Click over to the “Download” tab. In the table at the top, find the row that starts with “Samples affected”, and either Copy or Download that list. This is your list of samples that have a TP53 mutation.","Now for the negative criteria example. This also begins by using OQL to run a query for KRAS mutations (KRAS: MUT) in your study of interest. Click over to the “Download” tab. Look at the table at the top again, but this time find the row that starts with “Sample matrix”. Copy or download this data and open it in Excel. You will see a two column table that indicates whether a given sample is altered or not, indicated by 0 or 1. Sort by the second column and then copy all the sample IDs from the first column that have a 0 in the second column. This is your list of samples that do not have a KRAS mutation.","With a sample list in hand, you can now either run a query in just the selected samples (select “User-defined Case List” in the “Select Patient/Case Set:” dropdown) or explore this set of patients in study view (click “Select cases by IDs” and then create a Virtual Study restricted to just those samples).","For more information about OQL, see the specification page or view the tutorial slides. For more information about virtual studies, read this FAQ or view the tutorial slides."]},{"i":"how-can-i-compare-two-or-more-subsets-of-samples","l":"How can I compare two or more subsets of samples?","p":["cBioPortal has a suite of analysis tools to enable comparisons between user-defined groups of samples/patients. For an overview of this functionality, see our tutorial on group comparison."]},{"i":"is-it-possible-to-determine-if-a-particular-mutation-is-heterozygous-or-homozygous-in-a-sample-when-a-sample-has-2-mutations-in-one-gene-is-it-possible-to-determine-whether-the-mutations-are-in-cis-or-in-trans-with-each-other","l":"Is it possible to determine if a particular mutation is heterozygous or homozygous in a sample? When a sample has 2 mutations in one gene, is it possible to determine whether the mutations are in cis or in trans with each other?","p":["There is currently no way to definitively determine whether a mutation is heterozygous/homozygous or in cis/trans with another mutation. However, you can try to infer the status of mutations by noting the copy number status of the gene and the variant allele frequency of the mutation(s) of interest relative to other mutations in the same sample. The cBioPortal patient/sample view can help you accomplish this.","Specifically in the case of TCGA samples with two mutations in the same gene, you can also obtain access to the aligned sequencing reads from the GDC and check if the mutations are in cis or in trans (if the mutations are close enough to each other)."]},{"i":"how-can-i-query-overunder-expression-of-a-gene","l":"How can I query over/under expression of a gene?","p":["cBioPortal supports Onco Query Language (OQL) which can be used to query over/under expression of a gene. When writing a query, select an mRNA expression profile. By default, samples with expression z-scores >2 or <-2 in any queried genes are considered altered. Alternate cut-offs can be defined using OQL, for example: \"EGFR: EXP>2\" will query for samples with an EGFR expression z-score >2. Review for the OQL specification page or tutorial slides for more specifics and examples."]},{"i":"how-can-i-compare-outcomes-in-patients-with-high-vs-low-expression-of-a-gene","l":"How can I compare outcomes in patients with high vs low expression of a gene?","p":["To compare outcomes in patients with high vs low expression of a gene (excluding those patients with intermediate levels of expression), we will follow a 2 step process that builds on the approach described above in How can I query/explore a select subset of samples?, utilizing OQL to first identify and then stratify that cases of interest.","First, identify the sample set using OQL. For example, to stratify patients based on expression of EGFR, add an mRNA profile to the query, and write \"EGFR: EXP>2 EXP<-2\" in the gene set box. After running the query, go to the Download tab and copy/download the “Samples affected” list.","Second, return to the homepage and paste the list of sample IDs from the previous step into the “User-defined Case List” in the “Select Patient/Case Set:” dropdown. This query will now only look at samples with high or low expression. To now stratify into high vs low for survival analysis, enter \"EGFR: EXP>2\" in the gene set box (don’t forget to select the same mRNA profile). Run the query and click over to the Survival tab. The “cases with alteration” are patients with high expression of EGFR and the cases without alteration are those with low expression of EGFR.","We use 2 and -2 as example thresholds above, but it is also a good idea to look at the distribution of expression data and select a threshold based on that. Plots tab can be useful for analyzing the expression distribution."]},{"l":"Results View"},{"l":"OncoPrint"},{"i":"what-are-oncoprints","l":"What are OncoPrints?","p":["OncoPrints are compact means of visualizing distinct genomic alterations, including somatic mutations, copy number alterations, and mRNA expression changes across a set of cases. They are extremely useful for visualizing gene set and pathway alterations across a set of cases, and for visually identifying trends, such as trends in mutual exclusivity or co-occurrence between gene pairs within a gene set. Individual genes are represented as rows, and individual cases or patients are represented as columns.","image"]},{"i":"can-i-change-the-order-of-genes-in-the-oncoprint","l":"Can I change the order of genes in the OncoPrint?","p":["By default, the order of genes in the OncoPrint will be the same as in your query. You can change the order by (a) clicking on the gene name and dragging it up/down or (b) clicking on the three vertical dots next to the gene name to move the gene up/down."]},{"i":"can-i-visualize-my-own-data-within-an-oncoprint","l":"Can I visualize my own data within an OncoPrint?","p":["Yes, check out the OncoPrinter tool on our Visualize Your Data page."]},{"i":"why-are-some-samples-not-profiled-for-certain-genes","l":"Why are some samples “Not profiled” for certain genes?","p":["Some studies include data from one or more targeted sequencing platforms which do not include all genes. For samples sequenced on these smaller panels, cBioPortal will indicate that a particular gene was not included on the sequencing panel used for that sample. Alteration frequency calculations for each gene also take this information into account. Hover over a sample in OncoPrint to see the gene panel name, and click on that gene panel name to view a list of the genes included on that panel."]},{"l":"Other pages"},{"i":"does-the-mutual-exclusivity-tab-calculate-its-statistics-using-all-samplesalterations-or-only-a-specific-subset","l":"Does the Mutual Exclusivity tab calculate its statistics using all samples/alterations or only a specific subset?","p":["The calculations on the Mutual Exclusivity tab are performed using all samples included in the query. A sample is defined as altered or unaltered for each gene based on the OQL utilized in the query - by default, this will be non-synonymous mutations, fusions, amplifications and deep deletions."]},{"i":"what-are-the-values-of-the-box-and-whiskers-in-a-boxplot","l":"What are the values of the box and whiskers in a boxplot?","p":["In boxplots on cBioPortal, the box is drawn from the 25th percentile (Q1) to the 75th percentile (Q3), with the horizontal line in between representing the median. Whiskers are drawn independently above and below the box, and will extend to the maximum or minimum data values, unless there are outlier values, in which case the whisker will extend to 1.5 * IQR (interquartile range = Q3-Q1). Outliers are defined as values that extend beyond 1.5 * IQR."]},{"l":"Study View"},{"i":"how-to-use-filter-in-the-url-of-study-view-page","l":"How to use filter in the URL of Study View page?","p":["You can filter the study based on values of one attribute in the URL. For example, https://www.cbioportal.org/study/summary?id=msk_impact_2017#filterJson={clinicalDataFilters:[{attributeId:CANCER_TYPE,values:[{value:Melanoma}]}]}","filterJson is set in the url hash string. Here are the allowed parameters and format for it in filterJson:"]},{"i":"what-if-i-have-other-questions-or-comments","l":"What if I have other questions or comments?","p":["Please contact us at cbioportal@googlegroups.com. Previous discussions about cBioPortal are available on the user discussion mailing list."]}],[{"l":"Overview"},{"l":"Overview of Resources"},{"l":"Tutorial Slides","p":["These tutorial slides contain annoted screenshots to walk you through using the cBioPortal site.","Single Study Exploration Google slides| PDF","Single Study Query Google slides| PDF","Patient View Google slides| PDF","Virtual Studies Google slides| PDF","Onco Query Language (OQL) Google slides| PDF","Group Comparison Google slides| PDF","Pathways Google slides| PDF"]},{"l":"Webinar Recordings","p":["Recordings of live webinars from April & May 2020","Introduction to cBioPortal youtube.com| bilibili.com | Download PDF | View slides","Mutation Details & Patient View youtube.com| bilibili.com | Download PDF | View slides","Expression Data Analysis youtube.com| bilibili.com | Download PDF | View slides","Group Comparison youtube.com| bilibili.com | Download PDF | View slides","API & R Client youtube.com| bilibili.com | Download PDF | View slides | Workshop code"]},{"l":"How-To Videos","p":["Short videos that show how to perform specific analyses or how to use specific pages.","Comparing samples based on expression level of a gene youtube.com","Proteomic profiles in cBioPortal - An example based on cancer cell lines from the Cancer Cell Line Encyclopedia (CCLE) youtube.com","Filtering and adding clinical data to Mutations tab youtube.com","Exploring the longitudinal evolution of individual patients youtube.com","Using Onco Query Language (OQL) to query based on the expression level of genes youtube.com","How to explore the data in a study youtube.com","How to run a query for genes of interest youtube.com","How to download data youtube.com","Navigating AACR GENIE - Biopharma Collaborative (BPC) dataset youtube.com"]},{"l":"Documentation","p":["Frequently Asked Questions FAQ","Onco Query Language OQL"]},{"l":"Publications","p":["Cerami et al. Cancer Discovery 2012 PubMed","Gao et al. Science Signaling 2013 PubMed"]},{"l":"Tutorials by others","p":["cBioPortal Tutorial Series by Jackson Laboratory youtube.com","Using the Cancer Digital Slide Archive in cBioPortal by Nicole M. Rivera Acevedo youtube.com (English)| youtube.com (Spanish)","Visualizing and Downloading RNASeq data from cBioPortal by Farhan Haq youtube.com"]}],[{"l":"By page"},{"l":"Resources by Page"},{"l":"Study View","p":["Tutorial Slides: Single Study Exploration Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com","How-To Video: Comparing samples based on expression level of a gene youtube.com","Tutorial Slides: Virtual Studies Google slides| PDF"]},{"l":"Group Comparison","p":["Tutorial Slides: Group Comparison Google slides| PDF","Webinar: Group Comparison youtube.com| bilibili.com","How-To Video: Comparing samples based on expression level of a gene youtube.com"]},{"i":"running-a-query--results-view","l":"Running a Query / Results View"},{"l":"General","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"OQL","p":["Documentation OQL","Tutorial Slides: Onco Query Language (OQL) Google slides| PDF","Webinar: Expression Data Analysis youtube.com| bilibili.com","How-To Video: Using Onco Query Language (OQL) to query based on the expression level of genes youtube.com"]},{"l":"OncoPrint","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Cancer Types Summary","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Mutual Exclusivity","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Plots","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Mutations","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com","Webinar: Mutation Details & Patient View youtube.com| bilibili.com","How-To Video: Filtering and adding clinical data to Mutations tab youtube.com"]},{"l":"Co-expression","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"i":"comparisonsurvival","l":"Comparison/Survival","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com","Tutorial Slides: Group Comparison Google slides| PDF","Webinar: Group Comparison youtube.com| bilibili.com"]},{"l":"CN Segments","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Pathways","p":["Tutorial Slides: Pathways Google slides| PDF","Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Downloads","p":["Tutorial Slides: Single Study Query Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com"]},{"l":"Patient View","p":["Tutorial Slides: Patient View Google slides| PDF","Tutorial Slides: Pathways Google slides| PDF","Webinar: Introduction to cBioPortal youtube.com| bilibili.com","Webinar: Mutation Details & Patient View youtube.com| bilibili.com","How-To Video: Exploring the longitudinal evolution of individual patients youtube.com"]}],[{"i":"onco-query-language-oql","l":"Onco Query Language (OQL)"},{"l":"Introduction to OQL","p":["The Onco Query Language (OQL) is used to define which specific types of alterations are included in a query on the cBioPortal for Cancer Genomics. By default, querying for a gene includes mutations, fusions, amplifications and deep deletions. OQL can be used to specify specific mutations (e.g. BRAF V600E) or types of mutations (e.g. BRCA1 truncating mutations), lower level copy number alterations (e.g. CDKN2A shallow deletions), changes in mRNA or protein expression, and more.","OQL-specified alterations will be reflected on most tabs, including OncoPrint, but are not currently reflected on the Plots, Co-Expression or Expression tabs.","Note that OQL assumes any word that it doesn't recognize is a mutation code.","Additional explanation and examples using OQL are available in the User Guide."]},{"l":"OQL Keywords","p":["* These are the default OQL keywords used for each data type when a gene is queried without any explicit OQL.","AMP Amplifications HOMDEL Deep Deletions GAIN Gains HETLOSS Shallow Deletions Comparison operators can also be used with CNA(e.g. CNA = GAIN is the same as AMP GAIN)","AMP HOMDEL","Copy Number Alterations","Data Type","Default*","EXP -x mRNA expression is less than x standard deviations (SD) below the mean EXP x mRNA expression is greater than x SD above the mean The comparison operators = and = also work","EXP = 2 EXP = -2","FUSION","FUSION All fusions (note that many studies lack fusion data)","Fusions","Keywords and Syntax","mRNA Expression","MUT","MUT All non-synonymous mutations MUT = protein change Specific amino acid changes (e.g. V600E or V600) MUT = mutation type Acceptable values are: MISSENSE, NONSENSE, NONSTART, NONSTOP, FRAMESHIFT, INFRAME, SPLICE, TRUNC","Mutations","PROT -x Protein expression is less than x standard deviations (SD) below the mean PROT x Protein expression is greater than x SD above the mean The comparison operators = and = also work","PROT = 2 PROT = -2","Protein/phosphoprotein level","Users can define specific subsets of genetic alterations for five data types:"]},{"l":"OQL modifiers","p":["Mutations and copy number alterations can be further refined using modifiers:","Keyword","Applicable Data Type","Explanation","DRIVER","Mutations Fusions Copy Number Alterations","Include only mutations, fusions and copy number alterations which are driver events, as defined in OncoPrint (default: OncoKB and CancerHotspots).","GERMLINE","Mutations","Include only mutations that are defined as germline events by the study.","SOMATIC","Include all mutations that are not defined as germline.","(a-b)(protein position range)","Include all mutations that overlap with the protein position range a-b, where a and b are integers. If you add a *(i.e. (a-b*)) then it will only include those mutations that are fully contained inside a-b. The open-ended ranges (a-) and (-b) are also allowed."]},{"l":"Basic Usage","p":["When querying a gene without providing any OQL specifications, cBioPortal will default to these OQL terms for a query with Mutation and Copy Number selected in the Genomic Profiles section: MUT FUSION AMP HOMDEL","image of basic query","You can see the OQL terms applied by hovering over the gene name in OncoPrint:","image of basic query oncoprint","If you select RNA and/or Protein in the \"Genomic Profiles\" section of the query, the default settings are:","RNA: EXP = 2 EXP = -2","Protein: PROT = 2 PROT = -2","image of exp prot query oncoprint","You must select the relevant Genomic Profile in order for OQL to query that data type. For example, you can't add EXP 2 to the query without also selecting an RNA profile.","Proper formatting for OQL is straightforward: gene name, followed by a colon, followed by any OQL keywords and ending in a semicolon, an end-of-line, or both.","In general, any combination of OQL keywords and/or expressions can annotate any gene, and the order of the keywords is immaterial.","Below we will go into greater detail about each data type."]},{"l":"Mutations","p":["For example, to view TP53 truncating mutations and in-frame insertions/deletions:","FRAMESHIFT","INFRAME","MISSENSE","mutation type can be one or more of:","NONSENSE","NONSTART","NONSTOP","Note that this will only work to exclude a single event. Because OQL uses 'OR' logic, excluding multiple mutations or excluding a mutation while including another mutation (e.g. BRAF: MUT=V600 MUT!=V600E) will result in querying all mutations.","OQL can also be used to exclude a specific protein change, position or type of mutation. For example, below are examples to query all EGFR mutations except T790M, all BRAF mutations except those at V600 and all TP53 mutations except missense:","OQL for mutations can also be written without MUT =. The following examples are identical:","Or all mutations of a specific type:","SPLICE","To view cases with specific mutations, provide the specific amino acid change of interest:","TRUNC","You can also view all mutations at a particular position:"]},{"l":"Copy Number Alterations","p":["To view cases with specific copy number alterations, provide the appropriate keywords for the copy number alterations of interest. For example, to see amplifications:","Or amplified and gained cases:","Which can also be written as:"]},{"l":"Expression","p":["High or low mRNA expression of a gene is determined by the number of standard deviations (SD) from the mean. For example, to see cases where mRNA for CCNE1 is greater than 3 SD above the mean:"]},{"l":"Protein","p":["High or low protein expression is similarly determined by the number of SD from the mean. For example, to see cases where protein expression is 2 SD above the mean:","Protein expression can also be queried at the phospho-protein level:"]},{"l":"Modifiers","p":["Modifiers can be used on their own or in combination with other OQL terms for mutations, fusions and copy number alterations to further refine the query. Modifiers can be combined with other OQL terms using an underscore. The order in which terms are combined is immaterial."]},{"l":"Driver","p":["The DRIVER modifier applies to mutations, fusions and copy number alterations. The definition of what qualifies as a driver alteration comes from the \"Mutation Color\" menu in OncoPrint. By default, drivers are defined as mutations, fusions and copy number alterations in OncoKB or CancerHotspots.","On its own, the DRIVER modifier includes driver mutations, fusions and copy number alterations:","Or it can be used in combination with another OQL term. For example, to see only driver fusion events:","Or driver missense mutations:","When combining DRIVER with another OQL term, the order doesn't matter: MUT_DRIVER and DRIVER_MUT are equivalent. DRIVER can be combined with:","MUT","MUT = mutation type or MUT = protein change","FUSION","CNA","AMP or GAIN or HETLOSS or HOMDEL","GERMLINE or SOMATIC(see below)"]},{"i":"germlinesomatic","l":"Germline/Somatic","p":["The GERMLINE and SOMATIC modifiers only apply to mutations. A mutation can be explicitly defined as germline during the data curation process. Note that very few studies on the public cBioPortal contain germline data.","GERMLINE or SOMATIC can be combined with:","MUT","MUT = mutation type or MUT = protein change","DRIVER","To see all germline BRCA1 mutations:","Or to see specifically truncating germline mutations:","The order is immaterial; both options produce identical results.","Or to see somatic missense mutations:","GERMLINE or SOMATIC can also be combined with DRIVER and, optionally, a more specific mutation term (e.g. NONSENSE):"]},{"l":"The DATATYPES Command","p":["To save copying and pasting, the DATATYPES command sets the genetic annotation for all subsequent genes. Thus,","is equivalent to:"]},{"l":"Merged Gene Tracks","p":["OQL can be used to create a merged gene track in OncoPrint, in which alterations in multiple genes appear as a single track. This is done by enclosing a list of genes in square brackets. By default, the track will be labeled by the gene names, separated by '/'. To instead specify a label, type the desired label within double quotes at the beginning of the square brackets. For example:","The resulting merged gene track will be visible in OncoPrint and can be expanded to view the individual gene tracks. For example:","Image of merged genes in OncoPrint","https://www.cbioportal.org/results/oncoprint?session_id=5c1966e2e4b05228701f958e","It is possible to include OQL for specific alterations in merged gene tracks, as well as querying a combination of single and merged gene tracks.","Note that merged gene tracks only appear in OncoPrint. All other pages show the individual genes."]},{"i":"example-rb-pathway-alterations","l":"Example: RB Pathway Alterations","p":["Provided below is one example of the power of using OQL. Additional examples are available in the User Guide."]},{"l":"Using the Defaults","p":["Select Ovarian Serous Cystadenocarcinoma (TCGA, Nature 2011) with the following data types:","Mutations","Putative copy-number alterations (GISTIC)","mRNA expression (mRNA expression Z-scores (all genes))","Input the following three genes in the RB pathway:","CCNE1","RB1","CDKN2A","image of rb query","Submit this query and note how many samples have alterations in multiple of these genes:","image of rb oncoprint","https://www.cbioportal.org/results/oncoprint?session_id=5c1966cee4b05228701f958d"]},{"l":"Greater Insight with OQL","p":["Given what is known about the RB pathway, the events that are most likely selected for in the tumors are CCNE1 amplification, RB1 deletions or mutations, and loss of expression of CDKN2A. To investigate this hypothesis, we can use OQL to display only these events. Modify the query to reflect this:","Examine the updated OncoPrint:","image of modified rb oncoprint","https://www.cbioportal.org/results/oncoprint?session_id=5c1966aee4b05228701f958c","This shows that alterations in these genes are almost entirely mutually-exclusive -- no cases are altered in all three genes and only six are altered in two genes. This supports the theory that the tumor has selected for these events."]},{"i":"questions-feedback","l":"Questions? Feedback?","p":["Please share any questions or feedback on OQL with us: https://groups.google.com/group/cbioportal","Also note that additional explanation and examples using OQL are available in the User Guide."]}],[{"l":"News"},{"i":"aug-21-2023","l":"Aug 21, 2023","p":["Added data consisting of 4,488 samples from 7 studies:","Lung Adenocarcinoma Met Organotropism (MSK, Cancer Cell 2023) 2653 samples","Acute Myeloid Leukemia (OHSU, Cancer Cell 2022) 942 samples","Colon Cancer (Sidra-LUMC AC-ICAM, Nat Med 2023) 348 samples","Pediatric Neuroblastoma (MSK, Nat Genet 2023) 223 samples","Colorectal Adenocarcinoma (MSK, Nat Commun 2022) 180 samples","Bladder Cancer (Columbia University/MSK, Cell 2018) 130 samples","Myoepithelial Carcinomas of Soft Tissue (WCM, CSH Molecular Case Studies 2022) 12 samples","Gene Tables Update Updated tables of genes (main and alias), based on Apr 1, 2023 HGNC release. See seedDB release note here for details."]},{"i":"aug-1-2023","l":"Aug 1, 2023","p":["Enhancement: One-sided Fisher's exact tests were changed to be two-sided. The affected pages are:","Results View Page - Mutual Exclusivity Tab","Results View Page - Comparison Tab - Genomic Alterations Tab","Comparison Page - Genomic Alterations Tab","Comparison Page - Mutations Tab","Please note that the Mutations tab on the Comparison page is a recent feature and was introduced with the two-sided Fisher's exact test already implemented.","Several users pointed out that using a one-sided test was incorrect for these comparisons. Please see discussions here for more information."]},{"i":"may-2-2023","l":"May 2, 2023","p":["New Feature: The mutations tab now shows variant annotations from the repository of Variant with Unexpected Effects (reVUE)."]},{"i":"apr-11-2023","l":"Apr 11, 2023","p":["New Feature: Disable autocommit and manually commit filters in study view. Manually commit filters can improve cBioPortal performance when query large dataset."]},{"i":"apr-5-2023","l":"Apr 5, 2023","p":["Added data consisting of 2,472 samples from 5 studies:","Bladder Cancer (MSK, Cell Reports 2022) 1659 samples","Gastrointestinal Stromal Tumor (MSK, NPJ Precis Oncol 2023) 499 samples","Appendiceal Cancer (MSK, J Clin Oncol 2022) 273 samples","Colorectal Cancer (MSK, Cancer Discovery 2022) 22 samples","Nerve Sheath Tumors (Johns Hopkins, Sci Data 2020) 19 samples[First GRCh38 Study]","Data Improvement","Added TERT promoter mutation status to Melanomas (TCGA, Cell 2015), Papillary Thyroid Carcinoma (TCGA, Cell 2014) TCGA studies."]},{"i":"apr-4-2023","l":"Apr 4, 2023","p":["New Feature: Allow numeric data type for custom data charts.","This also allows to have numerical custom data after we query based on genes (custom data 2 in the image):"]},{"i":"jan-10-2023","l":"Jan 10, 2023","p":["New Feature: New Pathways tab on the Group Comparison view. Example: Primary vs Metastasis samples in MSK-IMPACT Clinical Sequencing Cohort"]},{"i":"dec-13-2022","l":"Dec 13, 2022","p":["New Feature: New Mutations tab on the Group Comparison view. Example: Primary vs Metastasis samples in MSK-IMPACT Clinical Sequencing Cohort"]},{"i":"oct-12-2022","l":"Oct 12, 2022","p":["Added data consisting of 1,459 samples from 10 studies:","Hepatocellular Carcinoma (MERiC/Basel, Nat Commun. 2022) 122 samples","Prostate Cancer Brain Metastases (Bern, Nat Commun. 2022) 168 samples","Pan-Cancer MSK-IMPACT MET Validation Cohort (MSK 2022) 69 samples","Endometrial Carcinoma cfDNA (MSK, Clin Cancer Res 2022) 44 samples","Endometrial Carcinoma MSI (MSK, Clin Cancer Res 2022) 181 samples","Gallbladder Cancer (MSK, Clin Cancer Res, 2022) 244 samples","Meningioma (University of Toronto, Nature 2021) 121 samples","Mixed Tumors: Selpercatinib RET Trial (MSK, Nat Commun. 2022) 188 samples","Low-Grade Serous Ovarian Cancer (MSK, Clin Cancer Res 2022) 119 samples","Urothelial Carcinoma (BCAN/HCRN 2022) 203 samples"]},{"i":"sep-6-2022","l":"Sep 6, 2022","p":["Enhancement: Oncoprint can now save clinical tracks after login"]},{"i":"aug-11-2022","l":"Aug 11, 2022","p":["New Major Release: v5.0.0 release drops support for fusions in the mutation data format. Going forward fusions can only be imported in the Structural Variant (SV) format. This is mainly a refactoring effort to simplify the codebase and pave the way for the development of novel structural variant visualizations in the future. For cBioPortal instance maintainer, please reference our Migration Guide for instruction."]},{"i":"jul-26-2022","l":"Jul 26, 2022","p":["Added data consisting of 6,631 samples from 7 studies:","Metastatic Biliary Tract Cancers (SUMMIT - Neratinib Basket Trial, 2022) 36 samples","Rectal Cancer (MSK, Nature Medicine 2022) 801 samples","Lung Adenocarcinoma (MSK Mind,Nature Cancer 2022) 247 samples","Myelodysplastic Syndromes (MDS IWG, IPSSM, NEJM Evidence 2022) 3,323 samples","Esophagogastric Cancer (MSK, Clin Cancer Res 2022) 237 samples","Pan-cancer Analysis of Advanced and Metastatic Tumors (BCGSC, Nature Cancer 2020) 570 samples","Prostate Adenocarcinoma (MSK, Clin Cancer Res. 2022) 1,417 samples"]},{"i":"may-31-2022","l":"May 31, 2022","p":["New Feature: Added Quartiles, Median split and Generate bins options for bar charts on the study view page, where Generate bins allows user to define bin size and min value"]},{"i":"may-12-2022","l":"May 12, 2022","p":["New Feature: Show cohort alteration frequencies in pathways from NDEx on the Results View. Example: Glioblastoma signaling pathways in MSK-IMPACT (2017) cohort"]},{"i":"may-5-2022","l":"May 5, 2022","p":["New Feature: View mutations and copy number changes in the Integrative Genomics Viewer (IGV) on the Patient View. Example: Endometrial cancer patient in TCGA","New Feature: Add charts that plot categorical vs continuous data on the Study View. Example: MSK-IMPACT (2017) cohort","New Feature: Several single cell data integrations are now available for the CPTAC glioblastoma study, allowing one to:","Compare genomic alterations and cell type fractions in oncoprints on the Results View( Example)","Explore the single cell data further in Vitessce on the Patient View( Example)","Create cohorts and groups based on cell type fractions on the Study View( Example)","Compare differences in cell type fractions between groups on the Comparison Page( Example)"]},{"i":"apr-20-2022","l":"Apr 20, 2022","p":["Added data consisting of 2,557 samples from 5 studies:","Breast Cancer (HTAN, 2022) 5 samples","Colorectal Cancer (MSK, 2022) 47 samples","Pediatric Pancan Tumors (MSK, 2022) 135 samples","Sarcoma (MSK, 2022) 2,138 samples","Lung Cancer in Never Smokers (NCI, Nature Genetics 2021) 232 samples","Gene Tables Update Updated tables of genes (main and alias), based on Jan 1, 2022 HGNC release. See seedDB release note here for details.","Data Improvement","Pan-can studies timeline addition: TREATMENT, OTHER MALIGNANCY FORM, SAMPLE ACQUISITION, STATUS are added to all 32 TCGA Pan-Can studies. Details for data source and transformation process can be found here or in the README.md files included in each study folder on datahub. Example: patient view of TCGA-A2-A04P in Breast Invasive Carcinoma Tumor Type","Pan-can studies methylation addition: methylation profile (27k and 450k merged) are added to all 32 TCGA Pan-Can studies, in generic assay format. Data source: GDC. Example: search by gene or probe from dropdown, to add a chart in study view, a track in Oncoprint (single study query only), or plots in plots tab.","Single cell (type fraction and phases) data (in generic assay format) is added to Glioblastoma (CPTAC, Cell 2021)"]},{"i":"jan-4-2022","l":"Jan 4, 2022","p":["Added data consisting of 27,447 samples from 10 studies:","Endometrial Carcinoma (CPTAC, Cell 2020) 95 samples","Pancreatic Ductal Adenocarcinoma (CPTAC, Cell 2021) 140 samples","Lung Squamous Cell Carcinoma (CPTAC, Cell 2021) 108 samples","Lung Adenocarcinoma (CPTAC, Cell 2020) 110 samples","Glioblastoma (CPTAC, Cell 2021) 99 samples","Breast Cancer (CPTAC, Cell 2020) 122 samples","Pediatric Brain Cancer (CPTAC/CHOP, Cell 2020) 218 samples","Metastatic Prostate Cancer (Provisional, June 2021) 123 samples","MSK MetTropism (MSK, Cell 2021) 25,775 samples","Cancer Therapy and Clonal Hematopoiesis (MSK, 2021) 657 samples","Added TMB (nonsynonymous) scores for all studies. Example: new TMB field for study gbm_cptac_2021(Details for the calculation can be found HERE)"]},{"i":"nov-12-2021","l":"Nov 12, 2021","p":["Added data consisting of 3,680 samples from 6 studies:","Breast Cancer MAPK (MSKCC, Nat Commun 2021) 145 samples","Colorectal Cancer (MSK, 2020) 64 samples","Breast Cancer (MSK, Clinical Cancer Res 2020) 60 samples","High-Grade Serous Ovarian Cancer (MSK, 2021) 45 samples","Diffuse Glioma (GLASS Consortium, Nature 2019) 444 samples","Pan-cancer analysis of whole genomes (ICGC/TCGA, Nature 2020) 2,922 samples"]},{"i":"nov-32021","l":"Nov 3,2021","p":["New Feature: Add Uniprot topology as a new annotation track on the Mutations Tab of the Results View. Example: EGFR in MSK-IMPACT (2017) cohort"]},{"i":"oct-1-2021","l":"Oct 1, 2021","p":["New Feature: Arm level Copy Number events are now loaded into cBioPortal using the Categorial Generic Assay Data Type. They can be found in a tab under the Add Charts Button of the Study View Example: Arm Level Data in TCGA PanCancer Atlas"]},{"i":"sep-22-2021","l":"Sep 22, 2021","p":["Added data consisting of 14,844 samples from 7 studies:","Colorectal Cancer (MSK, Gastroenterology 2020) 471 samples","Metastatic Breast Cancer (MSK, Cancer Discovery 2021) 1,365 samples","Lung Adenocarcinoma (MSKCC, 2021) 186 samples","Race Differences in Prostate Cancer (MSK, 2021) 2,069 samples","Medulloblastoma (DKFZ, Nature 2017) 491 samples","Thoracic Cancer (MSK, 2021) 68 samples","China Pan-cancer (OrigiMed, 2020) 10,194 samples"]},{"i":"sep-21-2021","l":"Sep 21, 2021","p":["Enhancement: Dowloading the Lollipop plot on the Mutations Tab of the Results View will now also include the annotation tracks:"]},{"i":"aug-17-2021","l":"Aug 17, 2021","p":["New Feature: The Mutations Tab of the Results View can now show exon numbers as an annotation track Example: MET Exon 14 Mutations in MSK-IMPACT (2017) cohort"]},{"i":"aug-10-2021","l":"Aug 10, 2021","p":["New Feature: Use the filtering capabilities in the Mutations Tab of the Results View to create a custom cohort that one can open directly in the Study View Example: CTNNB1 in MSK-IMPACT (2017) cohort"]},{"i":"jul-27-2021","l":"Jul 27, 2021","p":["New Feature: Add a custom filter to any column of the Mutations Tab in the Results View Example: CTNNB1 in MSK-IMPACT (2017) cohort","New Feature: Show detailed descriptions for each annotation source in the header of the the Mutations Table in both the Results View and the Patient View Example link"]},{"i":"jul-6-2021","l":"Jul 6, 2021","p":["New Feature: Add any clinical data as a column on the Mutations Tab in the Results View Example: EGFR in MSK-IMPACT (2017) cohort"]},{"i":"june-23-2021","l":"June 23, 2021","p":["Added data consisting of 1,084 samples from 5 studies:","Intrahepatic Cholangiocarcinoma (MSK, Hepatology 2021) 412 samples","Intrahepatic Cholangiocarcinoma (Mount Sinai 2015) 8 samples","RAD51B Associated Mixed Cancers (Mandelker 2021 19 samples","Intrahepatic Cholangiocarcinoma (MSK, 2020) 219 samples","Lung Adenocarcinoma (NPJ Precision Oncology, MSK 2021) 426 samples","Added mass-spec proteome data from CPTAC to Breast Invasive Carcinoma (TCGA, PanCancer Atlas), Ovarian Serous Cystadenocarcinoma (TCGA, PanCancer Atlas) and Colorectal Adenocarcinoma (TCGA, PanCancer Atlas).","Added mass-spec phosphoproteome site level expression from CPTAC to Breast Invasive Carcinoma (TCGA, PanCancer Atlas) and Ovarian Serous Cystadenocarcinoma (TCGA, PanCancer Atlas).","Updated gene tables Updated tables of genes (main and alias), based on HGNC. See details HERE in section Contents of seed database. Sripts/resources/process used to construct new tables are described HERE."]},{"i":"june-1-2021","l":"June 1, 2021","p":["New Feature: In certain studies where we have the data we show read counts for uncalled mutations on the Patient View Example: A patient in the Glioma (MSK, 2019) cohort"]},{"i":"may-10-2021","l":"May 10, 2021","p":["New Feature: Pick color for User Defined Groups Example: Color Bladder Cancer Group in MSK-IMPACT (2017) cohort, implemented by The Hyve."]},{"i":"may-4-2021","l":"May 4, 2021","p":["New Feature: Add more categories of mutations to the Mutations Tab on the Results View, including Driver/VUS, Splice and Structural Variants Example: TP53 alterations in the MSK-IMPACT (2017) cohort"]},{"i":"april-21-2021","l":"April 21, 2021","p":["Added data consisting of 4074 samples from 9 studies:","Metaplastic Breast Cancer (MSK, 2021) 19 samples","Lung Adenocarcinoma (MSKCC, 2020) 604 samples","Cutaneous Squamous Cell Carcinoma (UCSF, 2021) 105 samples","MSK-IMPACT and MSK-ACCESS Mixed Cohort (MSK, 2021) 1446 samples","Melanoma (MSKCC, 2018) 720 samples","Cholangiocarcinoma (ICGC, Cancer Discov 2017) 489 samples","Esophageal/Stomach Cancer (MSK, 2020) 487 samples","Retinoblastoma (MSK, Cancers 2021) 83 samples","Combined Hepatocellular and Intrahepatic Cholangiocarcinoma (Peking University, Cancer Cell 2019) 121 samples"]},{"i":"april-20-2021","l":"April 20, 2021","p":["New Feature: Add driver annotations to download tab on Results View Example: RAS/RAF alterations in colorectal cancer"]},{"i":"march-30-2021","l":"March 30, 2021","p":["Enhancement: Add 95% Confidence Interval for Survival Plots Example: Altered vs Unaltered EGFR in Lung Cancer"]},{"i":"march-11-2021","l":"March 11, 2021","p":["New Feature: Combine different types of alterations in Comparison View Example: Deletions and Truncating events in primary vs metastases or read more on The Hyve's blog","Enhancement: Improve UI for OncoPrint, aggregating various data modalities in a single add track dropdown button Example: Add clinical, heatmap and treatment response data into the OncoPrint"]},{"i":"february-16-2021","l":"February 16, 2021","p":["Enhancement: Show only TCGA PanCancer Atlas Pathways in Results and Patient View to avoid showing many similar pathways Example: Clinvar APC and CTNNB1 alterations in WNT pathway"]},{"i":"january-28-2021","l":"January 28, 2021","p":["New Feature: Show ClinVar Interpretation in Mutation tables Example: Clinvar Interpretations in BRCA2"]},{"i":"january-12-2021","l":"January 12, 2021","p":["New Feature: Add your own custom data for a sample or patient to use on the study or comparison view Example: Add custom data to three samples and do a comparison","New Feature: Show the mutations of a patient inside a pathway schematic using PathwayMapper Example: Notch signaling pathway in a prostate cancer patient","New Feature: Display and compare generic assays, such as microbiome and treatment response, on the study view Example: Prasinovirus microbiome signatures in TCGA","New Feature: The Plots tab on Results View now allows you to group alterations by Driver and VUS Example: POLE driver mutations vs VUSs against mutation counts in TCGA Colorectal Adenocarcinoma"]},{"i":"december-31-2020","l":"December 31, 2020","p":["Added data consisting of 430 samples from 5 studies:","Juvenile Papillomatosis and Breast Cancer (MSK, 2020) 5 samples","Mixed cfDNA (MSKCC, 2020) 229 samples","Metastatic Melanoma (DFCI, Nature Medicine 2019) 144 samples","Lung Cancer (SMC, Cancer Research 2016) 22 samples","Upper Tract Urothelial Carcinoma (IGBMC, Genome Biology 2021) 30 samples","Added survival data to Breast Cancer (METABRIC, Nature 2012 & Nat Commun 2016)"]},{"i":"november-3-2020","l":"November 3, 2020","p":["New Feature: The map of local installations of cBioPortal is available now. Please consider registering your instance here. image","Enhancement: upgraded the Genomic Evolution tab in Patient View with timeline Example image"]},{"i":"october-20-2020","l":"October 20, 2020","p":["Enhancement: Expression tab has now been merged into the Plots tab image"]},{"i":"october-16-2020","l":"October 16, 2020","p":["Added data consisting of 25,078 samples from 5 studies:","Melanomas (TCGA, Cell 2015) 359 samples","Retinoblastoma cfDNA (MSKCC 2020) 14 samples","The Angiosarcoma Project (Provisional, July 2020) 83 samples","Bladder Cancer (MSK/TCGA, 2020) 476 samples","Cancer Therapy and Clonal Hematopoiesis (MSK, 2020) 24,146 samples","Added MSI data(MSIsensor from Mariamidze et al. 2018 and MANTIS scores from Roychowdhury et al. 2017) for all 32 TCGA PanCan Atlas Cohorts.","Added new profile“RNA-Seq V2 expression Z-scores relative to normal samples” for 16 TCGA PanCan Atlas Cohorts. The normals samples RNA-Seq V2 expression data were curated from GDC, and can be downloaded from our Datahub or Data Set page. Example: ERBB2 expression z-scores relative to normal expression","image"]},{"i":"october-13-2020","l":"October 13, 2020","p":["Enhancement: Study View now allows comparing samples with mutations or copy number alterations in different genes image","New Feature: When treatment timeline is available (e.g. in this study), Study View now allows the selection and comparison of patients treated with specific drugs, or samples sequenced pre or post specific drug treatments image"]},{"i":"september-30-2020","l":"September 30, 2020","p":["New Feature: Microbiome signature data is available for comparison now. Example: comparing colorectal subtypes for enriched microbiome signatures image"]},{"i":"september-22-2020","l":"September 22, 2020","p":["Enhancement: The timeline feature in Patient View has been refactored with an improved UI. Example image","Enhancement: Logrank p-values are now provided for all survival analysis (previously only availalbe when comparing two groups). Example"]},{"i":"august-11-2020","l":"August 11, 2020","p":["New Feature: microbiome data of TCGA samples from Poore et al. 2020 are now available for analysis in the OncoPrint and Plots tabs. Example: Orthohepadnavirus across TCGA cancers image","New Feature: You can now compare DNA Methylation data between groups using the Comparison feature. Example: Comparing DNA methylation levels between samples with high vs low BRCA1 expression image","Added data consisting of 513 samples from 3 studies:","Breast Cancer (SMC 2018) 187 samples","Germ Cell Tumors and Shared Leukemias (MSK 2020) 21 samples","Lung Adenocarcinoma (OncoSG, Nat Genet 2020) 305 samples","Added RPPA data in addition to the microbiome data for 31 TCGA Pancan studies (except LAML)"]},{"i":"july-21-2020","l":"July 21, 2020","p":["New Feature: The Mutations tab now has the option to show mutation effects for different transcripts / isoforms. Note that some annotation features are only available for the canonical isoform. example image","Enhancement: The Plots tab is now supported in multi-study queries. example image","New Feature: You can now share custom groups in the Study View example"]},{"i":"june-11-2020","l":"June 11, 2020","p":["Added data consisting of 267 samples from 2 studies:","Gastric Cancer (OncoSG, 2018) 147 samples","120 ctDNA samples added to Non-Small Cell Lung Cancer (TRACERx, NEJM & Nature 2017) 447 samples"]},{"i":"june-9-2020","l":"June 9, 2020","p":["Enhancement: using OQL to query for mutations based on a protein position range. example image","New Feature: you can now send the OncoPrint data to the OncoPrinter tool for customization. image","Enhancement: Mutational spectrum data can be downloaded from OncoPrint image"]},{"i":"june-2-2020","l":"June 2, 2020","p":["Enhancement: Pediatric cancer studies are now grouped and highlighted in the query page image"]},{"i":"may-6-2020","l":"May 6, 2020","p":["Added data consisting of 574 samples from 3 studies:","Uterine Sarcoma/Mesenchymal (MSK, Clin Cancer Res 2020) 108 samples","Metastatic castration-sensitive prostate cancer (MSK, Clin Cancer Res 2020) 424 samples","Glioblastoma (Columbia, Nat Med. 2019) 42 samples","Updated one study:","Expression data was added to The Metastatic Breast Cancer Project (Provisional, February 2020)."]},{"i":"april-24-2020","l":"April 24, 2020","p":["New Feature: Add a new chart on the Study View for selecting samples based on pre-defined case lists:"]},{"i":"april-10-2020","l":"April 10, 2020","p":["New Feature: Make cohorts on the Study View using continuous molecular profiles of one or more gene(s), such as mRNA expression, methylation, RPPA and continuous CNA. example","Combine this with the group comparison feature to compare e.g. all quartiles of expression:","New Feature: Annotate mutations using the Mutation Mapper Tool on the GRCh38 reference genome:","mutation_mapper_tool_grch38"]},{"i":"april-3-2020","l":"April 3, 2020","p":["New Feature: Extended the Comparison tab to support the comparison of altered samples per gene or alteration. This example query compares NSCLC patients with 1) both mutated and amplified EGFR, 2) mutated EGFR only, and 3) amplified EGFR only.","image"]},{"i":"march-27-2020","l":"March 27, 2020","p":["Enhancement: User selections in the Plots tab are now saved in the URL. example","New Feature: Added table of data availability per profile in the Study View. example"]},{"i":"march-20-2020","l":"March 20, 2020","p":["Enhancement: Extended Survival Analysis to support more outcome measures. example","image"]},{"i":"march-18-2020","l":"March 18, 2020","p":["Added data consisting of 1,393 samples from 3 studies:","Breast Cancer (Alpelisib plus AI, Nature Cancer 2020) 141 samples","Glioma (MSKCC, Clin Cancer Res 2019) 1,004 samples","Mixed cfDNA (MSK, Nature Medicine 2019) 248 samples"]},{"i":"march-3-2020","l":"March 3, 2020","p":["New Feature: Added Pathways tab to the Results View page, which visualizes the alteration frequencies of genes in pathways of interest. The pathways are pulled from https://www.pathwaymapper.org and shown in a read only view. One can edit these pathways in the PathwayMapper editor. For more information see the tutorial.","pathwaymapper_screenshot"]},{"i":"february-12-2020","l":"February 12, 2020","p":["Added data consisting of 1,605 samples from 3 studies:","Tumors with TRK fusions (MSK, 2019) 106 samples","Lymphoma Cell Lines (MSKCC, 2020) 34 samples","Prostate Adenocarcinoma (MSKCC, 2020) 1,465 samples"]},{"i":"february-6-2020","l":"February 6, 2020","p":["New Feature: Extend the recent group comparison feature by allowing comparisons inside the Results View page. The new tab allows for quick comparison of altered vs unaltered cases by survival, clinical information, mutation, copy number events and mRNA expression:","group_results640px","Performance enhancement: the Study View's mutation table now loads faster for studies with multiple gene panels. For the genie portal, which has a study with many different gene panels this resulted in a speed-up from ~ 90-120 seconds to 5 seconds.","Read more about the v3.2.2 release here"]},{"i":"january-30-2020","l":"January 30, 2020","p":["Enhancement: Show HGVSg in mutations table and linkout to Genome Nexus:","hgvsg genome nexus","Enhancement: Add a pencil button near gene list in results page which opens interface for quickly modifying the oql of the query:","edit query pencil","See more updates here"]},{"i":"january-29-2020","l":"January 29, 2020","p":["Added data consisting of 197 samples from 2 studies:","Bladder/Urinary Tract Cancer (MSK, 2019) 78 samples","Upper Tract Urothelial Carcinoma (MSK, 2019) 119 samples"]},{"i":"december-19-2019","l":"December 19, 2019","p":["Enhancement: We restored support for submitting large queries from external applications using HTTP POST requests. Accepted parameters are the same as appear in the url of a query submitted from the homepage.","See more updates here"]},{"i":"december-12-2019","l":"December 12, 2019","p":["Enhancement: Several enhancements to the display of gene panels on the Patient View page, by The Hyve, described in more detail here","image","Enhancement: Add Count Bubbles to Oncoprint Toolbar","Screenshot from 2019-12-06 11-36-21","See more updates here"]},{"i":"november-29-2019","l":"November 29, 2019","p":["Enhancement: Support group comparison for custom charts in Study View page","Enhancement: Performance improvement of Co-Expression analysis.","Enhancement: Kaplan-Meier plots now supports custom time range.","See more updates here"]},{"i":"november-22-2019","l":"November 22, 2019","p":["New Feature: Support for Treatment response data in the Oncoprint and Plots tab, including new Waterfall plot type. Read more in The Hyve's blog post","image"]},{"i":"november-15-2019","l":"November 15, 2019","p":["Enhancement: heatmap tracks in OncoPrint now has separate headers and sub-menus. example","image","Enhancement: global settings for query session"]},{"i":"november-7-2019","l":"November 7, 2019","p":["Added data consisting of 212 samples from 3 studies:","Metastatic Melanoma (DFCI, Science 2015) 110 samples","Melanoma (MSKCC, NEJM 2014) 64 samples","Metastatic Melanoma (UCLA, Cell 2016) 38 samples"]},{"i":"october-30-2019","l":"October 30, 2019","p":["Added data consisting of 178 samples from 2 studies:","Intrahepatic Cholangiocarcinoma (Shanghai, Nat Commun 2014) 103 samples","Non-Small Cell Lung Cancer (MSK, Cancer Cell 2018) 75 samples"]},{"i":"october-23-2019","l":"October 23, 2019","p":["Enhancement: Quick example links in Plots tab. example"]},{"i":"october-14-2019","l":"October 14, 2019","p":["New Feature: Fusion Genes table in Study View. example","image"]},{"i":"october-11-2019","l":"October 11, 2019","p":["Enhancement: The Download interface on the homepage has been removed. Enhanced download functionality is now available after querying on the results page.","Home page:","homepage download tab removed","Results page:","results page download tab","Note that as before one can always download the full raw data on the Data Sets page or from Datahub."]},{"i":"october-9-2019","l":"October 9, 2019","p":["Added data consisting of 2725 samples from 4 studies:","Cancer Cell Line Encyclopedia (Broad, 2019) 1739 samples","Chronic Lymphocytic Leukemia (Broad, Nature 2015) 537 samples","Rectal Cancer (MSK,Nature Medicine 2019) 339 samples","Colon Cancer (CPTAC-2 Prospective, Cell 2019) 110 samples","Updated Esophageal Carcinoma (TCGA, Nature 2017) with addition of CNA data for Esophageal Squamous Cell Carcinoma cases 90 samples."]},{"i":"september-18-2019","l":"September 18, 2019","p":["New Feature: The list and order of charts of a study will be automatically saved now as a user preference on the study view page."]},{"i":"september-6-2019","l":"September 6, 2019","p":["Added data consisting of 1216 samples from 3 studies:","Breast Cancer (MSKCC, 2019) 70 samples","Brain Tumor PDXs (Mayo Clinic, 2019) 97 samples","Adenoid Cystic Carcinoma Project (2019) 1049 samples"]},{"i":"august-13-2019","l":"August 13, 2019","p":["Added data consisting of 295 samples from 3 studies:","Pediatric Preclinical Testing Consortium (PPTC, 2019) 261 samples","Non-small cell lung cancer (MSK, Science 2015) 16 samples","Prostate Cancer (MSK, 2019) 18 samples"]},{"i":"july-26-2019","l":"July 26, 2019","p":["Added data consisting of 35 samples from 1 study:","Added Hypoxia data for:","Brain Lower Grade Glioma (TCGA, PanCancer Atlas)","Breast Invasive Carcinoma (TCGA, PanCancer Atlas)","Cervical Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Clear Cell Renal Cell Carcinoma (DFCI, Science 2019) 35 samples","Colorectal Adenocarcinoma (TCGA, PanCancer Atlas)","Glioblastoma Multiforme (TCGA, PanCancer Atlas)","Head and Neck Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Kidney Renal Clear Cell Carcinoma (TCGA, PanCancer Atlas)","Kidney Renal Papillary Cell Carcinoma (TCGA, PanCancer Atlas)","Liver Hepatocellular Carcinoma (TCGA, PanCancer Atlas)","Lung Adenocarcinoma (TCGA, PanCancer Atlas)","Lung Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Ovarian Serous Cystadenocarcinoma (TCGA, PanCancer Atlas)","Pancreatic Adenocarcinoma (TCGA, PanCancer Atlas)","Pheochromocytoma and Paraganglioma (TCGA, PanCancer Atlas)","Prostate Adenocarcinoma (TCGA, PanCancer Atlas)","Skin Cutaneous Melanoma (TCGA, PanCancer Atlas)","Thyroid Carcinoma (TCGA, PanCancer Atlas)","Uterine Corpus Endometrial Carcinoma (TCGA, PanCancer Atlas)"]},{"i":"july-24-2019","l":"July 24, 2019","p":["Added data consisting of 151 samples from 1 study:","Myeloproliferative Neoplasms (CIMR, NEJM 2013) 151 samples"]},{"i":"july-13-2019","l":"July 13, 2019","p":["Public Release 6.1 of AACR Project GENIE:","The sixth data set, GENIE 6.0-public, was released in early July 2019. A patch to GENIE 6.0-public, GENIE 6.1-pubic, was subsequently released on July 13, 2019. The combined data set now includes nearly 70,000 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. The combined data set now includes data for nearly 80 major cancer types, including data from nearly 11,000 patients with lung cancer, greater than 9,700 patients with breast cancer, and nearly 7,000 patients with colorectal cancer.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"july-2-2019","l":"July 2, 2019","p":["Added data consistng of 785 samples from 4 studies:","Non-Small Cell Lung Cancer (TRACERx, NEJM 2017) 327 samples","Acute myeloid leukemia or myelodysplastic syndromes (WashU, 2016) 136 samples","Basal Cell Carcinoma (UNIGE, Nat Genet 2016) 293 samples","Colon Adenocarcinoma (CaseCCC, PNAS 2015) 29 samples"]},{"i":"june-19-2019","l":"June 19, 2019","p":["New Feature: Show Genome Aggregation Database (gnomAD) population frequencies in the mutations table - see example:","gnomad feature news"]},{"i":"june-12-2019","l":"June 12, 2019","p":["Added data of 1350 samples from 3 studies:","Pheochromocytoma and Paraganglioma (TCGA, Cell 2017) 178 samples","Metastatic Solid Cancers (UMich, Nature 2017) 500 samples","Acute Myeloid Leukemia (OHSU, Nature 2018) 672 samples","Added survival data for TCGA PanCan Atlas Cohorts (>10,000 samples across 33 tumor types).","Added hypoxia data for Bladder Urothelial Carcinoma (TCGA, PanCancer Atlas)"]},{"i":"june-7-2019","l":"June 7, 2019","p":["New Group Comparison Feature: Compare clinical and genomic features of user-defined groups of samples/patients. View Tutorial","group-comparison"]},{"i":"may-8-2019","l":"May 8, 2019","p":["New Feature: Show Post Translational Modification (PTM) information from dbPTM on the Mutation Mapper- see example:","ptm feature_news"]},{"i":"april-26-2019","l":"April 26, 2019","p":["Added data of 568 samples from 4 studies:","Adenoid Cystic Carcinoma (JHU, Cancer Prev Res 2016) 25 samples","Histiocytosis Cobimetinib (MSK, Nature 2019) 52 samples","Upper Tract Urothelial Carcinoma (Cornell/Baylor/MDACC, Nat Comm 2019) 47 samples","Metastatic Prostate Adenocarcinoma (SU2C/PCF Dream Team, PNAS 2019) 444 samples"]},{"i":"march-29-2019","l":"March 29, 2019","p":["New Feature: Use the new quick search tab on the homepage to more easily navigate to a study, gene or patient:","quick_search_news"]},{"i":"march-15-2019","l":"March 15, 2019","p":["Added data of 338 samples from 4 studies:","Adenoid Cystic Carcinoma (MGH, Nat Gen 2016) 10 samples","Gallbladder Cancer (MSK, Cancer 2018) 103 samples","The Metastatic Prostate Cancer Project (Provisional, December 2018) 19 samples","Adult Soft Tissue Sarcomas (TCGA, Cell 2017) 206 samples"]},{"i":"february-22-2019","l":"February 22, 2019","p":["Enhancement: Exon number and HGVSc annotations are available in optional columns in the Mutations tab on the Results page and in the Patient View.","New feature: option to a show regression line in the scatter plot in the Plots tab on the Results page","image"]},{"i":"february-19-2019","l":"February 19, 2019","p":["New feature: Copy-Number Segments tab on the Study View page using igv.js v2- see example","Improved Copy-Number Segments tab on the Results page","New feature: OncoKB and Cancer Hotspots tracks in the Mutations tab on the Results page","image"]},{"i":"january-24-2019","l":"January 24, 2019","p":["Added data of 2328 samples from 8 studies:","Uveal Melanoma (QIMR, Oncotarget 2016) 28 samples","Squamous Cell Carcinoma of the Vulva (CUK, Exp Mol Med 2018) 15 samples","TMB and Immunotherapy (MSKCC, Nat Genet 2019) 1661 samples","Glioma (MSK, 2018) 91 samples","Urothelial Carcinoma (Cornell/Trento, Nat Gen 2016) 72 samples","Hepatocellular Carcinoma (MSK, Clin Cancer Res 2018) 127 samples","MSK Thoracic PDX (MSK, Provisional) 139 samples","Cholangiocarcinoma (MSK, Clin Cancer Res 2018) 195 samples","Updated data for The Metastatic Breast Cancer Project (Provisional, October 2018) 237 samples"]},{"i":"january-10-2019","l":"January 10, 2019","p":["cBioPortal now supports queries for driver mutations, fusions and copy number alterations as well as germline/somatic mutations using Onco Query Language (OQL)-- see example","A new tutorial explores OQL and provides examples of how OQL can be a powerful tool to refine queries."]},{"i":"december-17-2018","l":"December 17, 2018","p":["The 10th phase of cBioPortal architectural upgrade is now complete: the Study View has been moved to the new architecture with numerous improvements. This marks the completion of the cBioPortal architectural refactoring! \uD83C\uDF89\uD83C\uDF89\uD83C\uDF89","image"]},{"i":"october-29-2018","l":"October 29, 2018","p":["The ninth phase of the cBioPortal architectural upgrade is now complete: the results page is now a single-page application with better performance.","Supported plotting mutations by type in Plots tab","image"]},{"i":"october-19-2018","l":"October 19, 2018","p":["Support selection of transcript of interest in the MutationMapper tool via Genome Nexus.","mutation_mapper_dropdown"]},{"i":"october-17-2018","l":"October 17, 2018","p":["Added data of 3578 samples from 8 studies:","Rhabdoid Cancer (BCGSC, Cancer Cell 2016) 40 samples","Diffuse Large B-Cell Lymphoma (Duke, Cell 2017) 1001 samples","Diffuse Large B cell Lymphoma (DFCI, Nat Med 2018) 135 samples","Breast Fibroepithelial Tumors (Duke-NUS, Nat Genet 2015) 22 samples","Uterine Clear Cell Carcinoma (NIH, Cancer 2017) 16 samples","Endometrial Cancer (MSK, 2018) 197 samples","Breast Cancer (MSK, Cancer Cell 2018) 1918 samples","MSS Mixed Solid Tumors (Van Allen, 2018) 249 samples","Updated data for The Angiosarcoma Project (Provisional, September 2018) 48 samples"]},{"i":"august-20-2018","l":"August 20, 2018","p":["Now you can log in on the public cBioPortal with your Google account and save your virtual studies for quick analysis.","image"]},{"i":"august-7-2018","l":"August 7, 2018","p":["The eighth phase of the cBioPortal architectural upgrade is now complete: The Plots, Expression, Network, and Bookmarks tabs, and therefore all analysis tabs in the results page, have been moved to the new architecture.","Updated the MutationMapper tool, now connecting to Genome Nexus for annotating mutations on the fly.","Total Mutations and Fraction Genome Altered are now available in Plots tab for visualization and analysis.","Enhanced clinical attribute selector for OncoPrint, now showing sample counts per attribute.","image"]},{"i":"july-27-2018","l":"July 27, 2018","p":["Added data of 2787 samples from 10 studies:","Mixed Tumors (PIP-Seq 2017) 103 samples","Nonmuscle Invasive Bladder Cancer (MSK Eur Urol 2017) 105 samples","Pediatric Neuroblastoma (TARGET, 2018) 1089 samples","Pediatric Pan-Cancer (DKFZ - German Cancer Consortium, 2017) 961 samples","Skin Cutaneous Melanoma (Broad, Cancer Discov 2014) 78 samples","Cutaneous Squamous Cell Carcinoma (MD Anderson, Clin Cancer Res 2014) 39 samples","Diffuse Large B-cell Lymphoma (BCGSC, Blood 2013) 53 samples","Non-Hodgkin Lymphoma (BCGSC, Nature 2011) 14 samples","Chronic lymphocytic leukemia (ICGA, Nat 2011) 105 samples","Neuroblastoma (Broad Institute 2013) 240 samples"]},{"i":"june-20-2018","l":"June 20, 2018","p":["The seventh phase of the cBioPortal architectural upgrade is now complete: The Enrichments and Co-Expression tabs have been moved to the new architecture.","Supported merged gene tracks in OncoPrint and Onco Query Language-- see example","image"]},{"i":"may-10-2018","l":"May 10, 2018","p":["Enhanced OncoPrint to show germline mutations -- see example","image"]},{"i":"april-17-2018","l":"April 17, 2018","p":["Acute Lymphoblastic Leukemia (St Jude, Nat Genet 2016) 73 samples","Added data of 3416 samples from 10 published studies:","Added data of 3732 samples from 4 TARGET studies:","Bladder Cancer (TCGA, Cell 2017) 413 samples","Colorectal Cancer (MSK, Cancer Cell 2018) 1134 samples","Metastatic Esophagogastric Cancer (MSK,Cancer Discovery 2017) 341 samples","Non-Small Cell Lung Cancer (MSK, JCO 2018) 240 samples","Pediatric Acute Lymphoid Leukemia - Phase II (TARGET, 2018) 1978 samples","Pediatric Acute Myeloid Leukemia (TARGET, 2018) 1025 samples","Pediatric Rhabdoid Tumor (TARGET, 2018) 72 samples","Pediatric Wilms' Tumor (TARGET, 2018) 657 samples","Prostate Adenocarcinoma (EurUrol, 2017) 65 samples","Prostate Adenocarcinoma (MSKCC/DFCI, Nature Genetics 2018) 1013 samples","Small-Cell Lung Cancer (Multi-Institute 2017) 20 samples","The Angiosarcoma Project (Provisional, February 2018) 14 samples","Updated Segment data and Allele Frequencies for The Metastatic Breast Cancer Project (Provisional, October 2017) 103 samples"]},{"i":"april-5-2018","l":"April 5, 2018","p":["Acute Myeloid Leukemia (TCGA, PanCancer Atlas)","Added data from the TCGA PanCanAtlas project with >10,000 samples from 33 tumor types:","Adrenocortical Carcinoma (TCGA, PanCancer Atlas)","Bladder Urothelial Carcinoma (TCGA, PanCancer Atlas)","Brain Lower Grade Glioma (TCGA, PanCancer Atlas)","Breast Invasive Carcinoma (TCGA, PanCancer Atlas)","Cervical Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Cholangiocarcinoma (TCGA, PanCancer Atlas)","Colon Adenocarcinoma (TCGA, PanCancer Atlas)","Diffuse Large B-Cell Lymphoma (TCGA, PanCancer Atlas)","Esophageal Adenocarcinoma (TCGA, PanCancer Atlas)","Glioblastoma Multiforme (TCGA, PanCancer Atlas)","Head and Neck Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Kidney Chromophobe (TCGA, PanCancer Atlas)","Kidney Renal Clear Cell Carcinoma (TCGA, PanCancer Atlas)","Kidney Renal Papillary Cell Carcinoma (TCGA, PanCancer Atlas)","Liver Hepatocellular Carcinoma (TCGA, PanCancer Atlas)","Lung Adenocarcinoma (TCGA, PanCancer Atlas)","Lung Squamous Cell Carcinoma (TCGA, PanCancer Atlas)","Mesothelioma (TCGA, PanCancer Atlas)","Ovarian Serous Cystadenocarcinoma (TCGA, PanCancer Atlas)","Pancreatic Adenocarcinoma (TCGA, PanCancer Atlas)","Pheochromocytoma and Paraganglioma (TCGA, PanCancer Atlas)","Prostate Adenocarcinoma (TCGA, PanCancer Atlas)","Rectum Adenocarcinoma (TCGA, PanCancer Atlas)","Sarcoma (TCGA, PanCancer Atlas)","Skin Cutaneous Melanoma (TCGA, PanCancer Atlas)","Stomach Adenocarcinoma (TCGA, PanCancer Atlas)","Testicular Germ Cell Tumors (TCGA, PanCancer Atlas)","Thymoma (TCGA, PanCancer Atlas)","Thyroid Carcinoma (TCGA, PanCancer Atlas)","Uterine Carcinosarcoma (TCGA, PanCancer Atlas)","Uterine Corpus Endometrial Carcinoma (TCGA, PanCancer Atlas)","Uveal Melanoma (TCGA, PanCancer Atlas)"]},{"i":"march-20-2018","l":"March 20, 2018","p":["The sixth phase of the cBioPortal architectural upgrade is now complete: The Download tab has been moved to the new architecture.","Data can now be downloaded in tabular format from OncoPrint.","Added an option to download an SVG file on the Cancer Type Summary tab."]},{"i":"january-15-2018","l":"January 15, 2018","p":["The fifth phase of the cBioPortal architectural upgrade is now complete: The OncoPrint and Survival tabs have been moved to the new architecture."]},{"i":"november-20-2017","l":"November 20, 2017","p":["You can now combine multiple studies and view them on the study summary page. Example: liver cancer studies","You can now bookmark or share your selected samples as virtual studies with the share icon on the study summary page. Example: a virtual study of breast tumors","Cross-study query reimplemented: Now you can view an OncoPrint of multiple studies. Example: querying NSCLC tumors from 5 studies","image"]},{"i":"october-17-2017","l":"October 17, 2017","p":["The fourth phase of the cBioPortal architectural upgrade is now complete: The Mutual Exclusivity and Cancer Type Summary tabs have been moved to the new architecture.","Updated protein structure alignment data in Mutations tab are now retrieved from Genome Nexus via the G2S web service."]},{"i":"october-2-2017","l":"October 2, 2017","p":["Added data of 1646 samples from 7 published studies:","NGS in Anaplastic Oligodendroglioma and Anaplastic Oligoastrocytomas tumors (MSK, Neuro Oncol 2017) 22 samples","MSK-IMPACT Clinical Sequencing Cohort for Non-Small Cell Cancer (MSK, Cancer Discovery 2017) 915 samples","Paired-exome sequencing of acral melanoma (TGEN, Genome Res 2017) 38 samples","MSK-IMPACT Clinical Sequencing Cohort in Prostate Cancer (MSK, JCO Precision Oncology 2017) 504 samples","Whole-exome sequences (WES) of pretreatment melanoma tumors (UCLA, Cell 2016) 39 samples","Next generation sequencing (NGS) of pre-treatment metastatic melanoma samples (MSK, JCO Precision Oncology 2017) 66 samples","Targeted gene sequencing in 62 high-grade primary Unclassified Renal Cell Carcinoma (MSK, Nature 2016) 62 samples","Updated data for MSK-IMPACT Clinical Sequencing Cohort (MSK, Nat Med 2017) with overall survival data."]},{"i":"august-3-2017","l":"August 3, 2017","p":["The third phase of the cBioPortal architectural upgrade is now complete: The Mutations tab now has a fresh look and faster performance -- see example","image","Variant interpretations from the CIViC database are now integrated into the annotation columns on the Mutations tab and in the patient view pages","New summary graph for all cancer studies and samples on the front page"]},{"i":"june-26-2017","l":"June 26, 2017","p":["The second phase of the cBioPortal architectural upgrade is now complete: The query interface now has a fresh look and faster performance.","image"]},{"i":"may-12-2017","l":"May 12, 2017","p":["Added data of 12,211 samples from 11 published studies:","MSK-IMPACT Clinical Sequencing Cohort (MSK, Nat Med 2017) 10,945 samples","Whole-genome sequencing of pilocytic astrocytomasatic (DKFZ, Nat Genetics, 2013) 96 samples","Hepatocellular Carcinomas (INSERM, Nat Genet 2015) 243 samples","Cystic Tumor of the Pancreas (Johns Hopkins, PNAS 2011) 32 samples","Whole-Genome Sequencing of Pancreatic Neuroendocrine Tumors (ARC- Net, Nature, 2017) 98 samples","Medulloblastoma (Sickkids, Nature 2016) 46 samples","Genetic Characterization of NSCLC young adult patients (University of Turin, Lung Cancer 2016) 41 samples","Genomic Profile of Patients with Advanced Germ Cell Tumors (MSK, JCO 2016). 180 samples","Ampullary Carcinoma (Baylor, Cell Reports 2016) 160 samples","Mutational profiles of metastatic breast cancer (INSERM, 2016) 216 samples","Prostate Adenocarcinoma (Fred Hutchinson CRC, Nat Med 2016) 154 samples"]},{"i":"may-5-2017","l":"May 5, 2017","p":["First phase of cBioPortal architectural upgrade complete: Patient view now has fresh look and faster performance. example"]},{"i":"march-28-2017","l":"March 28, 2017","p":["New features:","Per-sample mutation spectra are now available in OncoPrints -- see example","image","mRNA heat map clustering is now supported in OncoPrints","MDACC Next-Generation Clustered Heat Maps are now available in the patient view","cBioPortal web site style change"]},{"i":"feburary-2-2017","l":"Feburary 2, 2017","p":["New features:","3D hotspot mutation annotations are now available from 3dhotspots.org","New data:","CPTAC proteomics data have been integrated for TCGA breast, ovarian, and colorectal provisional studies"]},{"i":"december-23-2016","l":"December 23, 2016","p":["New features:","Heat map visualization of gene expression data in the OncoPrint","OncoPrint Heatmap","Heat map visualization of gene expression data in the Study View page connecting to MDACC's TCGA Next-Generation Clustered Heat Map Compendium"]},{"i":"october-7-2016","l":"October 7, 2016","p":["New features:","All data sets can now be downloaded as flat files from the new Data Hub","Annotation of putative driver missense mutations in OncoPrints, based on OncoKB, mutation hotspots, and recurrence in cBioPortal and COSMIC","OncoPrint-OncoKB","Copy number segments visualization directly in the browser in a new CN Segments tab via IGV.js","image","Improvements:","Improved cancer study view page (bug fixes and increased performance)"]},{"i":"july-24-2016","l":"July 24, 2016","p":["Added data of 4,375 samples from 21 published studies:","Adenoid Cystic Carcinoma (FMI, Am J Surg Pathl. 2014) 28 samples","Adenoid Cystic Carcinoma (MDA, Clin Cancer Res 2015) 102 samples","Adenoid Cystic Carcinoma (Sanger/MDA, JCI 2013) 24 samples","Adenoid Cystic Carcinoma of the Breast (MSKCC, J Pathol. 2015) 12 samples","Bladder Cancer, Plasmacytoid Variant (MSKCC, Nat Genet 2016) 34 samples","Breast Cancer (METABRIC, Nat Commun 2016) 1980 samples","Chronic Lymphocytic Leukemia (Broad, Cell 2013) 160 samples","Chronic Lymphocytic Leukemia (IUOPA, Nature 2015) 506 samples","Colorectal Adenocarcinoma (DFCI, Cell Reports 2016) 619 samples","Cutaneous T Cell Lymphoma (Columbia U, Nat Genet 2015) 42 samples","Diffuse Large B-Cell Lymphoma (Broad, PNAS 2012) 58 samples","Hepatocellular Adenoma (Inserm, Cancer Cell 2014) 46 samples","Hypodiploid Acute Lymphoid Leukemia (St Jude, Nat Genet 2013) 44 samples","Insulinoma (Shanghai, Nat Commun 2013) 10 samples","Malignant Pleural Mesothelioma (NYU, Cancer Res 2015) 22 samples","Mantle Cell Lymphoma (IDIBIPS, PNAS 2013) 29 samples","Myelodysplasia (Tokyo, Nature 2011) 29 samples","Neuroblastoma (Broad, Nat Genet 2013) 56 samples","New TCGA study:","OncoTree codes assigned per sample","Oral Squamous Cell Carcinoma (MD Anderson, Cancer Discov 2013) 40 samples","Pan-Lung Cancer (TCGA, Nat Genet 2016) 1144 samples","Pancreatic Adenocarcinoma (QCMG, Nature 2016) 383 samples","Recurrent and Metastatic Head & Neck Cancer (JAMA Oncology, 2016) 151 samples","RPPA data updated with the latest data from MD Anderson","Updated TCGA provisional studies","updated to the Firehose run of January 28, 2016"]},{"i":"june-6-2016","l":"June 6, 2016","p":["New features:","Annotation of mutation effect and drug sensitivity on the Mutations tab and the patient view pages (via OncoKB) oncokb-screenshot","Improvements:","Improved OncoPrint visualization using WebGL: faster, more zooming flexibility, visualization of recurrent variants","Improved Network tab with SBGN view for a single interaction","Performance improvement of tables in the study view page","Mutation type summary on the Mutations tab"]},{"i":"march-31-2016","l":"March 31, 2016","p":["New features:","Visualization of \"Enrichments Analysis\" results via volcano plots","Improved performance of the cross cancer expression view by switching to Plot.ly graphs","Improvements to the \"Clinical Data\" tab on the study view page","More customization options for the cross-cancer histograms","Performance improvements in the study view and query result tabs","Added data of 1235 samples from 3 published studies:","Merged Cohort of LGG and GBM (TCGA, 2016)","Lung Adenocarcinoma (MSKCC, 2015)","Poorly-Differentiated and Anaplastic Thyroid Cancers (MSKCC, JCI 2016)"]},{"i":"january-12-2016","l":"January 12, 2016","p":["Acinar Cell Carcinoma of the Pancreas (Johns Hopkins, J Pathol 2014)","Added data of 650 samples from 10 published studies:","All mutation data mapped to UniProt canonical isoforms","All TCGA data updated to the latest Firehose run of August 21, 2015","Bladder Urothelial Carcinoma (Dana Farber & MSKCC, Cancer Discovery 2014)","Cholangiocarcinoma (TCGA, Provisional)","Clear Cell Renal Cell Carcinoma (U Tokyo, Nat Genet 2013)","Desmoplastic Melanoma (Broad Institute, Nat Genet 2015)","Esophageal Squamous Cell Carcinoma (UCLA, Nat Genet 2014)","Gastric Adenocarcinoma (TMUCIH, PNAS 2015)","Low-Grade Gliomas (UCSF, Science 2014)","Mesothelioma (TCGA, Provisional)","Multiregion Sequencing of Clear Cell Renal Cell Carcinoma (IRC, Nat Genet 2014)","Neuroblastoma (AMC Amsterdam, Nature 2012)","New features:","New TCGA studies:","Primary Central Nervous System Lymphoma (Mayo Clinic, Clin Cancer Res 2015)","Testicular Germ Cell Cancer (TCGA, Provisional)","Thymoma (TCGA, Provisional)","Visualization of multiple samples in a patient","Visualization of timeline data of a patient ( example) timeline-example"]},{"i":"december-23-2015","l":"December 23, 2015","p":["New features:","Visualization of RNA-seq expression levels across TCGA studies (cross-cancer queries) cross cancer expression","Selection of genes in the study view to initiate queries query gene in study view","Improvement:","3-D structures in the \"Mutations\" tab are now rendered by 3Dmol.js (previously JSmol)","Improved performance by code optimization and compressing large data by gzip"]},{"i":"december-1-2015","l":"December 1, 2015","p":["New feature: Annotated statistically recurrent hotspots, via new algorithm by Chang et al. 2015 Annotate recurrent hotspots"]},{"i":"november-9-2015","l":"November 9, 2015","p":["New features:","Links to MyCancerGenome.org for mutations Link to MyCancerGenome.org","Improved display of selection samples on the study view page","Improvements:","\"Enrichments\" analysis is now run across all genes","The \"Network\" tab is now using Cytoscape.js (Adobe Flash is no longer required)"]},{"i":"october-6-2015","l":"October 6, 2015","p":["Added data of 763 samples from 12 published studies:","Breast Invasive Carcinoma (TCGA, Cell 2015)","Cutaneous squamous cell carcinoma (DFCI, Clin Cancer Res 2015)","Ewing Sarcoma (Institut Cuire, Cancer Discov 2014)","Gallbladder Carcinoma (Shanghai, Nat Genet 2014)","Infant MLL-Rearranged Acute Lymphoblastic Leukemia (St Jude, Nat Genet 2015)","Microdissected Pancreatic Cancer Whole Exome Sequencing (UTSW, Nat Commun 2015)","New TCGA data:","Pancreatic Neuroendocrine Tumors (JHU, Science 2011)","Pediatric Ewing Sarcoma (DFCI, Cancer Discov 2014)","Prostate Adenocarcinoma (TCGA, in press)","Renal Non-Clear Cell Carcinoma (Genentech, Nat Genet 2014)","Rhabdomyosarcoma (NIH, Cancer Discov 2014)","Small Cell Lung Cancer (U Cologne, Nature 2015)","Thymic epithelial tumors (NCI, Nat Genet 2014)","Uterine Carcinosarcoma (JHU, Nat Commun 2014)","Uveal Melanoma (TCGA, Provisional)"]},{"i":"august-21-2015","l":"August 21, 2015","p":["All TCGA data updated to the Firehose run of April 16, 2015.","New feature: Enrichments Analysis finds alterations that are enriched in either altered or unaltered samples.","Improvement: improved OncoPrint with better performance."]},{"i":"june-3-2015","l":"June 3, 2015","p":["Improvements:","Allowed downloading data in each chart/table in study summary page.","Added log-rank test p-values to the survival plots in study summary page.","Improved visualization of patient clinical data in patient-centric view.","Added option to merge multiple samples for the same patient in OncoPrint."]},{"i":"april-28-2015","l":"April 28, 2015","p":["New features:","Redesigned query interface to allow selecting multiple cancer studies","Redesigned Plots tab"]},{"i":"january-20-2015","l":"January 20, 2015","p":["All TCGA data updated to the Firehose run of October 17, 2014","COSMIC data updated to V71","New features:","Query page: better search functions to find cancer studies","OncoPrints now support color coding of different mutation types","OncoPrints now support multiple clinical annotation tracks","OncoPrinter tool now supports mRNA expression changes Oncoprint with multiple clinical tracks"]},{"i":"january-6-2015","l":"January 6, 2015","p":["New feature: You can now view frequencies of mutations and copy-number alterations in the study view. These tables are updated dynamically when selecting subsets of samples. Alterations in heavily copy-number altered endometrial cancer cases"]},{"i":"december-9-2014","l":"December 9, 2014","p":["New TCGA data:","Added complete and up-to-date clinical data for all TCGA provisional studies","All TCGA data updated to the Firehose run of July 15, 2014","New TCGA provisional studies: Esophageal cancer, Pheochromocytoma and Paraganglioma (PCPG)","New published TCGA studies: Thyroid Cancer and Kidney Chromophobe","Added data of 172 samples from 4 published studies:","Cholangiocarcinoma (National University of Singapore, Nature Genetics 2012)","Cholangiocarcinoma (National Cancer Centre of Singapore, Nature Genetics 2013)","Intrahepatic Cholangiocarcinoma (Johns Hopkins University, Nature Genetics 2013)","Bladder Cancer (MSKCC, Eur Urol 2014)","New features:","Redesigned Mutual Exclusivity tab","Added correlation scores for scatter plots on the Plots tab","Download links to GenomeSpace"]},{"i":"october-24-2014","l":"October 24, 2014","p":["Added data of 885 samples from 11 published studies:","Colorectal Adenocarcinoma Triplets (MSKCC, Genome Biology 2014)","Esophageal Squamous Cell Carcinoma (ICGC, Nature 2014)","Malignant Peripheral Nerve Sheath Tumor (MSKCC, Nature Genetics 2014)","Melanoma (Broad/Dana Farber, Nature 2012)","Nasopharyngeal Carcinoma (National University Singapore, Nature Genetics 2014)","Prostate Adenocarcinoma CNA study (MSKCC, PNAS 2014)","Prostate Adenocarcinoma Organoids (MSKCC, Cell 2014)","Stomach Adenocarcinoma (TCGA, Nature 2014)","Stomach Adenocarcinoma (Pfizer and University of Hong Kong, Nature Genetics 2014)","Stomach Adenocarcinoma (University of Hong Kong, Nature Genetics 2011)","Stomach Adenocarcinoma (University of Tokyo, Nature Genetics 2014)"]},{"i":"august-8-2014","l":"August 8, 2014","p":["Released two new tools","Oncoprinter lets you create Oncoprints from your own, custom data","MutationMapper draws mutation diagrams (lollipop plots) from your custom data"]},{"i":"may-21-2014","l":"May 21, 2014","p":["All TCGA data updated to the Firehose run of April 16, 2014"]},{"i":"may-12-2014","l":"May 12, 2014","p":["Improved study summary page including survival analysis based on clinical attributes e.g. TCGA Endometrial Cancer cohort Study view"]},{"i":"march-27-2014","l":"March 27, 2014","p":["New features:","Visualizing of mutations mapped on 3D structures (individual or multiple mutations, directly in the browser)","Gene expression correlation analysis (find all genes with expression correlation to your query genes)","The Patient-Centric View now displays mutation frequencies across all cohorts in cBioPortal for each mutation","The Mutation Details Tab and the Patient-Centric View now display the copy-number status of each mutation 3D viewer & Co-expression"]},{"i":"march-18-2014","l":"March 18, 2014","p":["Added mutation data of 898 samples from 11 published studies:","Added two new provisional TCGA studies:","Adrenocortical Carcinoma","All TCGA data updated to the Firehose run of January 15, 2014","Hepatocellular Carcinoma (AMC, Hepatology in press)","Hepatocellular Carcinoma (RIKEN, Nature Genetics 2012)","Medulloblastoma (Broad, Nature 2012)","Medulloblastoma (ICGC, Nature 2012)","Medulloblastoma (PCGP, Nature 2012)","Multiple Myeloma (Broad, Cancer Cell 2014)","NCI-60 Cell Lines (NCI, Cancer Res. 2012)","Pancreatic Adenocarcinoma (ICGC, Nature 2012)","Small Cell Carcinoma of the Ovary (MSKCC, Nature Genetics in press)","Small Cell Lung Cancer (CLCGP, Nature Genetics 2012)","Small Cell Lung Cancer (Johns Hopkins, Nature Genetics 2012)","Updated to the latest COSMIC data (v68)","Uterine Carcinosarcoma"]},{"i":"december-9-2013","l":"December 9, 2013","p":["Added mutation data of 99 bladder cancer samples (BGI, Nature Genetics 2013)"]},{"i":"december-6-2013","l":"December 6, 2013","p":["Data sets matching four recently submitted or published TCGA studies are now available","Glioblastoma (Cell 2013)","Bladder carcinoma (Nature, in press)","Head & neck squamous cell carcinoma (submitted)","Lung adenocarcinoma (submitted)"]},{"i":"november-8-2013","l":"November 8, 2013","p":["All TCGA data updated to the Firehose run of September 23, 2013.","Updated to the latest COSMIC data (v67).","Added mutation data of 792 samples from 9 published cancer studies:","Esophageal Adenocarcinoma (Broad, Nature Genetics 2013)","Head and Neck Squamous Cell Carcinoma (Broad, Science 2011)","Head and Neck Squamous Cell Carcinoma (Johns Hopkins, Science 2011)","Kidney Renal Clear Cell Carcinoma (BGI, Nature Genetics 2012)","Prostate Adenocarcinoma, Metastatic (Michigan, Nature 2012)","Prostate Adenocarcinoma (Broad/Cornell, Nature Genetics 2012)","Prostate Adenocarcinoma (Broad/Cornell, Cell 2013)","Skin Cutaneous Melanoma (Yale, Nature Genetics 2012)","Skin Cutaneous Melanoma (Broad, Cell 2012)"]},{"i":"october-21-2013","l":"October 21, 2013","p":["Improved interface for survival plots, including information on individual samples via mouse-over","New fusion glyph in OncoPrints FGFR3 fusions in head and neck carcinoma","Improved cross-cancer query: new alteration frequency histogram (example below - query gene: CDKN2A) and mutation diagram Cross Cancer Query"]},{"i":"september-9-2013","l":"September 9, 2013","p":["Updated COSMIC data (v66 Release)","Improved / interactive visualization on the \"Protein changes\" tab","Enhanced mutation diagrams: color-coding by mutation time and syncing with table filters","Addition of DNA cytoband information in the patient view of copy-number changes","OncoPrints now allow the display of an optional track with clinical annotation (Endometrial cancer example below) Oncoprint with clinical track"]},{"i":"july-25-2013","l":"July 25, 2013","p":["Multi-gene correlation plots.","Variant allele frequency distribution plots for individual tumor samples.","Tissue images for TCGA samples in the patient view, via Digital Slide Archive. Example."]},{"i":"july-16-2013","l":"July 16, 2013","p":["All TCGA data updated to the May Firehose run (May 23, 2013).","TCGA Pancreatic Cancer study (provisional) added."]},{"i":"july-4-2013","l":"July 4, 2013","p":["Improved rendering of mutation diagrams, including ability to download in PDF format.","Improved home page: Searchable cancer study & gene set selectors, data sets selector."]},{"i":"june-17-2013","l":"June 17, 2013","p":["Improved interface for correlation plots, including information on individual samples via mouse-over.","Gene Details from Biogene are now available in the Network view.","Added mutation and copy number data from a new adenoid cystic carcinoma study: Ho et al., Nature Genetics 2013.","Added mutation data from 6 cancer studies.","Breast Invasive Carcinoma (Shah et al., Nature 2012)","Breast Invasive Carcinoma (Banerji et al., Nature 2012)","Breast Invasive Carcinoma (Stephens et al., Nature 2012)","Lung Adenocarcinoma (Imielinksi et al., Cell 2012)","Lung Adenocarcinoma (Ding et al., Nature 2008)","Colorectal Cancer (Seshagiri et al., Nature 2012)"]},{"i":"june-4-2013","l":"June 4, 2013","p":["All TCGA data updated to the April Firehose run (April 21, 2012)."]},{"i":"may-14-2013","l":"May 14, 2013","p":["Added a published TCGA study: Acute Myeloid Leukemia (TCGA, NEJM 2013)."]},{"i":"april-28-2013","l":"April 28, 2013","p":["All TCGA data updated to the March Firehose run (March 26, 2012).","mRNA percentiles for altered genes shown in patient view."]},{"i":"april-2-2013","l":"April 2, 2013","p":["All TCGA data updated to the February Firehose run (February 22, 2012)."]},{"i":"march-28-2013","l":"March 28, 2013","p":["All TCGA data updated to the January Firehose run (January 16, 2012).","Data from a new bladder cancer study from MSKCC has been added (97 samples, Iyer et al., JCO in press)."]},{"i":"february-16-2013","l":"February 16, 2013","p":["The cBio Portal now contains mutation data from all provisional TCGA projects. Please adhere to the TCGA publication guidelines when using these and any TCGA data in your publications.","All data updated to the October Firehose run (October 24, 2012).","Sequencing read counts and frequencies are now shown in the Mutation Details table when available.","Improved OncoPrints, resulting in performance improvements."]},{"i":"november-21-2012","l":"November 21, 2012","p":["Major new feature: Users can now visualize genomic alterations and clinical data of individual tumors, including:","Summary of mutations and copy-number alterations of interest","Clinical trial information","TCGA Pathology Reports","New cancer summary view(Example Endometrial Cancer)","Updated drug data from KEGG DRUG and NCI Cancer Drugs (aggregated by PiHelper)"]},{"i":"october-22-2012","l":"October 22, 2012","p":["All data updated to the Broad Firehose run from July 25, 2012.","COSMIC data added to Mutation Details (via Oncotator).","All predicted functional impact scores are updated to Mutation Assessor 2.0.","Users can now base queries on genes in recurrent regions of copy-number alteration (from GISTIC via Firehose).","The Onco Query Language (OQL) now supports queries for specific mutations or mutation types.","Data sets added that match the data of all TCGA publications (GBM, ovarian, colorectal, and lung squamous)."]},{"i":"july-18-2012","l":"July 18, 2012","p":["Mutation data for the TCGA lung squamous cell carcinoma and breast cancer projects (manuscripts in press at Nature).","All data updated to the latest Broad Firehose run(May 25, 2012).","Drug information added to the network view (via Drugbank).","Improved cross-cancer queries: Option to select data types, export of summary graphs.","Users can now base queries on frequently mutated genes (from MutSig via Firehose)."]},{"i":"may-16-2012","l":"May 16, 2012","p":["All data updated to the latest Broad Firehose run(March 21, 2012).","Extended cross-cancer functionality, enabling users to query across all cancer studies in our database.","New \"build a case\" functionality, enabling users to generate custom case sets, based on one or more clinical attributes.","New OncoPrint features, including more compact OncoPrints, and support for RPPA visualization."]},{"i":"february-27-2012","l":"February 27, 2012","p":["All data updated to the latest Broad Firehose run(January 24, 2012).","Validated mutation data for colorectal cancer.","New feature: Mutation Diagrams that show mutations in the context of protein domains. TP53 Mutations in Ovarian Cancer"]},{"i":"january-30-2012","l":"January 30, 2012","p":["Updated data for several TCGA cancer studies.","Some small bug-fixes."]},{"i":"december-22-2011","l":"December 22, 2011","p":["Fourteen new TCGA cancer studies: This includes complete data for TCGA Colorectal Carcinoma and provisional data for thirteen other cancer types in the TCGA production pipeline. Please note that data from these thirteen new cancer types are provisional, not final and do not yet include mutation data. As per NCI guidelines, preliminary mutation data cannot be redistributed until they have been validated. TCGA","Four new data types:","Reverse-phase protein array (RPPA) data.","microRNA expression and copy-number (including support for multiple loci)","RNA-Seq based expression data.","log2 copy-number data.","Updated TCGA GBM copy-number, expression, and methylation data.","New gene symbol validation service. You can now use gene aliases and/or Entrez Gene IDs within your gene sets.","Links to IGV for visualization of DNA copy-number changes.","Background information from the Sanger Cancer Gene Census.","Two new Tutorials to get you quickly started in using the portal."]},{"i":"november-14-2011","l":"November 14, 2011","p":["New and improved mutation details, with sorting and filtering capabilities.","In collaboration with Bilkent University, we have added a new Network tab to our results pages. The network tab enables users to visualize, analyze and filter cancer genomic data in the context of pathways and interaction networks derived from Pathway Commons. GBM Network"]},{"i":"september-3-2011","l":"September 3, 2011","p":["You can now query across different cancer studies (feature available directly from the home page).","Our MATLAB CGDS Cancer Genomics Toolbox is now available. The toolbox enables you to download data from the cBio Portal, and import it directly into MATLAB.","The code for the cBio Portal has now been fully open sourced, and made available at Google Code. If you would like to join our open source efforts and make the portal even better, drop us an email."]},{"i":"march-2-2011","l":"March 2, 2011","p":["New plotting features and other improvements:","Correlation plots that show the relationship between different data types for individual genes.","Survival analysis - assess survival differences between altered and non-altered patient sets.","Updated R Package with support for correlation plots and general improvements for retrieving and accessing data in R data frames.","The Web Interface now supports basic clinical data, e.g. survival data.","Networks for pathway analysis are now available for download. Survival Analysis"]},{"i":"december-15-2010","l":"December 15, 2010","p":["Several new features, including:","Redesigned and streamlined user interface, based on user feedback and usability testing.","Advanced support for gene-specific alterations. For example, users can now view mutations within TP53, and ignore copy number alterations, or only view amplifications of EGFR, and ignore deletions.","Improved performance.","Frequently Asked Questions document released.","Updated Video Tutorial(update: old link no longer functional. Now see: YouTube"]},{"i":"november-4-2010","l":"November 4, 2010","p":["Enhanced Oncoprints, enabling users to quickly visualize genomic alterations across many cases. Oncoprints now also work in all major browsers, including Firefox, Chrome, Safari, and Internet Explorer.","Official release of our Web Interface, enabling programmatic access to all data.","Official release of our R Package, enabling programmatic access to all data from the R platform for statistical computing. OncoPrints"]}],[{"l":"Genie News"},{"i":"november-9-2023","l":"November 9, 2023","p":["Public Release 14.1 of AACR Project GENIE:","The public release 14.1 version of AACR GENIE has 10 samples retracted that were present in AACR GENIE 14.0-public.","More detailed information can be found in the AACR GENIE release notes and the data releases page from Sage Bionetworks"]},{"i":"september-20-2023","l":"September 20, 2023","p":["Public Release 14.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The current release, GENIE 14.0-public now contains 183,000 sequenced samples from nearly 160,000 patients, making the AACR Project GENIE registry among the largest fully public cancer genomic data sets released to date.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"may-1-2023","l":"May 1, 2023","p":["Public Release 13.1 of AACR Project GENIE:","The public release 13.1 version of AACR GENIE has 65 samples retracted that were present in AACR GENIE 13.0-public.","More detailed information can be found in the AACR GENIE release notes and the data releases page from Sage Bionetworks"]},{"i":"january-9-2023","l":"January 9, 2023","p":["Public Release 13.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The current release, GENIE 13.0-public now contains more than 167,000 sequenced samples from over 148,000 patients, making the AACR Project GENIE registry among the largest fully public cancer genomic data sets released to date.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"november-3-2022","l":"November 3, 2022","p":["Public Release BPC CRC 2.0-PUBLIC","The GENIE BPC CRC v2.0-public dataset contains 1,485 CRC patients from three institutions: MSKCC, DFCI, and VICC.","The complete, post-processed data are available on Synapse"]},{"i":"july-6-2022","l":"July 6, 2022","p":["Public Release GENIE ERBB2 Cohort","The study contains 315 samples from 135 patients from 6 institues."]},{"i":"july-22-2022","l":"July 22, 2022","p":["Public Release 12.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The current release, GENIE 12.0-public, was released in July 2022.The registry now contains more than 154,000 sequenced samples from 137,000+ patients, making the AACR Project GENIE registry among the largest fully public cancer genomic data sets released to date.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"may-19-2022","l":"May 19, 2022","p":["Public Release BPC NSCLC 2.0-PUBLIC","The GENIE BPC NSCLC v2.0-public dataset contains 1,846 NSCLC patients from 4 institutions: MSKCC, DFCI, VICC and UHN.","The complete, post-processed data are available on Synapse"]},{"i":"january-7-2022","l":"January 7, 2022","p":["Public Release 11.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The current release, GENIE 11.0-public, was released in January 2022. The registry now contains over 136,000 sequenced samples from over 121,000 patients, making the AACR Project GENIE registry among the largest fully public cancer genomic data sets released to date.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"june-22-2021","l":"June 22, 2021","p":["Public Release 10.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The tenth data set, GENIE 10.0-public, was released in June 2021. With the most recent data release, the registry now contains genomic information from 120953 samples.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"february-8-2021","l":"February 8, 2021","p":["Public Release 9.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The ninth data set, GENIE 9.0-public, was released in February 2021. With the most recent data release, the registry now contains genomic information from more nearly 17,000 non-small cell lung carcinomas, and nearly 12,000 breast and more than 11,000 colorectal cancers.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access."]},{"i":"july-7-2020","l":"July 7, 2020","p":["Public Release 8.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The eighth data set, GENIE 8.0-public, was released in July 2020. The combined data set now includes nearly 96,000 de-identified genomic records collected from patients who were treated at each of the consortium's 19 participating institutions, making it among the largest fully public cancer genomic data sets released to date. The combined data set now includes data for over 80 major cancer types, including data from greater than 14,000 patients with lung cancer, nearly 12,000 patients with breast cancer, and nearly 9,500 patients with colorectal cancer.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"january-29-2020","l":"January 29, 2020","p":["Public Release 7.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The seventh data set, GENIE 7.0-public, was released in January 2020. The combined data set now includes nearly 79720 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. The combined data set now includes data for over 80 major cancer types.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"july-13-2019","l":"July 13, 2019","p":["Public Release 6.1 of AACR Project GENIE:","The sixth data set, GENIE 6.0-public, was released in early July 2019. A patch to GENIE 6.0-public, GENIE 6.1-pubic, was subsequently released on July 13, 2019. The combined data set now includes nearly 70,000 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. The combined data set now includes data for nearly 80 major cancer types, including data from nearly 11,000 patients with lung cancer, greater than 9,700 patients with breast cancer, and nearly 7,000 patients with colorectal cancer.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"july-08-2019","l":"July 08, 2019","p":["Public Release 6.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The sixth data set, GENIE 6.0-public, was released in July 2019 adding more than 11,000 records to the database. The combined data set now includes nearly 71,000 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. The combined data set now includes data for over 80 major cancer types, including data from greater than 11,000 patients with lung cancer, nearly 9,800 patients with breast cancer, and more than 7,000 patients with colorectal cancer.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"january-11-2019","l":"January 11, 2019","p":["Public Release 5.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The forth data set, GENIE 4.0-public, was released in July 2018 adding more than 7,800 records to the database. The combined data set now includes more than 59,000 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. This data will be released to the public every six months.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"july-16-2018","l":"July 16, 2018","p":["Public Release 4.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The forth data set, GENIE 4.0-public, was released in July 2018 adding more than 7,800 records to the database. The combined data set now includes more than 47,000 de-identified genomic records collected from patients who were treated at each of the consortium's participating institutions, making it among the largest fully public cancer genomic data sets released to date. This data will be released to the public every six months. The public release of the fifth data set, GENIE 5.0-public, will take place in January, 2019.","The combined data set now includes data for over 80 major cancer types, including data from greater than 7,500 patients with lung cancer, nearly 5,500 patients with breast cancer, and more than 5,100 patients with colorectal cancer.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"l":"January 2018","p":["Added 7500 samples to the GENIE Public Cohort The combined dataset now includes samples from over 60 major cancer types including:","6,000 lung cancer samples.","4,500 breast cancer samples.","4,300 colorectal cancer samples.","More detailed information can be found in the AACR GENIE Data Guide. In addition to accessing the data via the cBioPortal, users can download the data directly from Sage Bionetworks. For frequently asked questions, visit the AACR FAQ page."]},{"i":"november-20-2017","l":"November 20, 2017","p":["Public Release 2.0 of AACR Project GENIE:","The first set of cancer genomic data aggregated through AACR Project Genomics Evidence Neoplasia Information Exchange (GENIE) was released to the global community in January 2017. The second dataset was released in November 2017, adding more than 13,000 records to the database. The combined data set now includes over 32,000 de-identified genomic records collected from patients who were treated at each of the consortium’s participating institutions, making it among the largest fully public cancer genomic data sets released to date. These data will be continuously updated on a quarterly basis.","The combined data set now includes data for 59 major cancer types, including data on nearly 5,000 patients with lung cancer, nearly 4,000 patients with breast cancer, and more than 3,500 patients with colorectal cancer. For more details about the data, and how to use it, consult the data guide.","In addition to accessing the data on the AACR Project GENIE cBioPortal website, users can download the data directly from Sage Bionetworks. Users will need to create an account for either site and agree to the terms of access.","For frequently asked questions, visit the AACR FAQ page."]},{"i":"january-5-2017","l":"January 5, 2017","p":["Initial public release of AACR Project GENIE:","Somatic alterations in 18,980 tumor samples from 18,500 patients sequenced at eight different institutions.","Data is available for download from Sage Bionetworks."]},{"l":"AACR Project GENIE cBioPortal Terms of Use","p":["I will not attempt to identify or contact individual participants from whom these data were collected by any means.","I will not redistribute the data without express written permission from the AACR Project GENIE Coordinating Center (info at aacrgenie dot org).","When publishing or presenting work using or referencing the AACR Project GENIE dataset please include the following attributions:","Please cite: AACR Project GENIE Consortium. AACR Project GENIE: Powering Precision Medicine Through an International Consortium, Cancer Discov. 2017 Aug;7(8):818-831 and include the version of the dataset used.","Include the following acknowledgement: The authors would like to acknowledge the American Association for Cancer Research and its financial and material support in the development of the AACR Project GENIE registry, as well as members of the consortium for their commitment to data sharing. Interpretations are the responsibility of study authors.","Should you decide at any point in the future to stop using the AACR Project GENIE cBioPortal, send request: genie-cbioportal-access at cbio dot mskcc dot org and we will remove your user id and any other information provided during new user registration from our systems.","For more information see terms of access on the AACR website."]}],[{"l":"API and API Clients","p":["cBioPortal provides a REST API for programmatic access to the data. The visualizations one can see on the website leverage the same API. By connecting to the API directly, anyone can build their own visalizations/reports.","Please see the full reference documentation for the API here."]},{"l":"API Clients","p":["The cBioPortal REST API is described using Swagger/OpenAPI, which allows one to generate a client in most programming languages. One can use the command line tool curl for dowloading data on the command line or use another language such as Python or R to make visualizations. We list some common examples below, but if your language is not listed, there is likely a client generator available elsewhere (see e.g. https://swagger.io/tools/swagger-codegen/). Do reach out if you'd like us to add a language."]},{"l":"R clients","p":["There are multiple ways to access the API using R. Below are two recommended R packages to access cBioPortal data."]},{"i":"cbioportaldata-recommended","l":"cBioPortalData (recommended)","p":["cBioPortalData aims to import all cBioPortal datasets as MultiAssayExperiment objects in Bioconductor. Some of its key features:","The MultiAssayExperiment class explicitly links all assays to the patient clinical/pathological data","The MultiAssayExperiment class provides a flexible API including harmonized subsetting and reshaping to convenient wide and long formats.","It provides complete datasets, not just for subsets of genes","It provides automatic local caching, thanks to BiocFileCache.","For a comprehensive user guide to cBioportalData see: https://waldronlab.io/cBioPortalData/articles/cBioPortalData.html","See also the workshop materials from our webinar which include an intro to cBioPortalData: https://github.com/cBioPortal/2020-cbioportal-r-workshop.","Note that one can point to private authenticated instances like this:"]},{"i":"cbioportalr-recommended","l":"cbioportalR (recommended)","p":["cbioportalR offers easy-to-use functions that allow users to browse and pull data from public or institutional cBioPortal sites without knowledge of web service or Bioconductor infrastructures. The package is tidyverse-compatible. Key package features include:","Comprehensive documentation aimed at helping clinical researchers understand the underlying structure of cBioPortal data","Tutorials for quick API authentication and set up","Functions to pull complete clinical and genomic data by study ID, molecular profile ID, sample list IDs or individual sample ID (e.g. get_genetics_by_study(), get_genetics_by_sample())","Functions to navigate and identify patient IDs, sample IDs or study IDs as needed, or infer necessary ID information for queries when not supplied by user.","Helper functions to pull information on gene panels ( get_gene_panel()), or lookup entrez ID ( get_entrez_id()), Hugo Symbol ( get_hugo_symbol()) or common gene aliases ( get_alias()) of genes","Capability to query multiple sample IDs from different studies concurrently","For a detailed tutorial on cbioportalR, see the package website: https://www.karissawhiting.com/cbioportalR/articles/overview-of-workflow.html"]},{"l":"rapiclient","p":["Although we recommend cBioPortalData or cbioportalR for most use cases, it is possible to connect to the API directly using rapiclient:"]},{"i":"cgdsr-will-be-deprecated","l":"CGDSR (will be deprecated)","p":["The CGDS-R package connects an older version of our web API ( webservice.do). Althought we will continue to keep webservice.do running for a while, we can't guarantee the same level of quality as our new API ( cbioportal.org/api) provides. Therefore we recommend that you use cBioPortalData instead."]},{"l":"Python client","p":["There are multiple ways to access the API using Python. One can use the bravado package to access the API directly, or use the cbio_py client, which provides a simple wrapper for the API and returns data in a format that is easy to work with."]},{"l":"bravado","p":["Generate a client in Python using bravado like this:","This allows you to access all API endpoints:","For easy tab completion you can add lower cases and underscores:","This example gets you all mutation data for the MSK-IMPACT 2017 study:","For a portal that requires authentication one can use (see Data Access Using Tokens):","A Jupyter notebook with more examples can be found here."]},{"l":"cbio_py","p":["See the cbio_py documentation: https://pypi.org/project/cbio-py/."]}],[{"l":"Deployment","p":["Private instances of cBioPortal are maintained by institutions and companies around the world.","An instance can be deployed using Docker (recommended) or by building and deploying from source. The source code of cBioPortal is available on GitHub under the terms of Affero GPL V3.","This section contains instructions for both of these paths.","Please note that installing a local version requires system administration skills; for example, installing and configuring Tomcat and MySQL. With limited resources, we cannot provide technical support on system administration."]}],[{"l":"Architecture Overview","p":["cBioPortal consists of the following components:","backend","MySQL database","REST API written in Java Spring","Redis cache for storing frequently used queries (optional)","validator checks file formats before importing data into the database","frontend built with React, Mobx and Bootstrap","session service for storing user saved data such as virtual studies and groups","REST API written in Java Spring enabling retrieval and writing to the database","MongoDB database","cBioPortal also uses the APIs from various external services to provide more information about a variant"]},{"l":"Backend","p":["The backend is written in Java and connects to a MySQL database to serve a REST API following the OpenAPI specification ( https://www.cbioportal.org/api/). Note that the repo where this lives in ( https://github.com/cBioPortal/cbioportal) also contains Java classes to import data as well as the validator. The backend can be configured to connect to a Redis cache to store database query results for improved performance.","The backend is organized as a multi-module Maven project. See cBioPortal backend code organization."]},{"l":"Validator","p":["The validator checks file formats before importing data into the database. There is a wrapper script metaImport.py that validates the data and subsequently calls the relevant Java classes to import the data."]},{"l":"Session Service","p":["The session service is used for storing user saved data such as virtual studies and groups. See the tutorials section to read more about these features. Session service is a Java app that serves a REST API backed by a Mongo database. The session service is served as a proxy through the cBioPortal backend REST API. The backend is therefore the only component that needs to be able to connect to it. The frontend does not connect to it directly."]},{"l":"Frontend","p":["The frontend is a single page app built with React, Mobx and Bootstrap. The data gets pulled from the backend REST API. The frontend is by default included with the backend so no extra setup is required."]},{"l":"External Services","p":["cBioPortal uses the APIs from several external services to provide more information about a variant:","OncoKB","CIVIC","Genome Nexus","G2S","For privacy concerns see the section: A note on privacy."]},{"l":"OncoKB","p":["OncoKB is a precision oncology knowledge base that contains information about the effects and treatment implications of specific cancer gene alterations. See the section OncoKB Data Access for how to configure external OncoKB service."]},{"l":"CIVIC","p":["CIVIC is a community-edited forum for discussion and interpretation of peer-reviewed publications pertaining to the clinical relevance of variants (or biomarker alterations) in cancer. For information on how to deploy this service yourself see: https://github.com/griffithlab/civic-server. It is also possible to disable showing CIVIC in cBioPortal by setting show.civic=false in the portal.properties(See portal.properties reference)."]},{"l":"Genome Nexus","p":["Genome Nexus is a comprehensive one-stop resource for fast, automated and high-throughput annotation and interpretation of genetic variants in cancer. For information on how to deploy this service yourself see: https://github.com/genome-nexus/genome-nexus. For more information on the various annotation sources and versions provided by Genome Nexus see: https://docs.genomenexus.org/annotation-sources."]},{"l":"G2S","p":["G2S (Genome to Structure) maps genomic variants to 3D structures. cBioPortal uses it on the mutations tab to show the variants on a 3D structure. For information on how to deploy this service yourself see: https://github.com/genome-nexus/g2s."]},{"l":"A note on privacy","p":["cBioPortal calls these services with variant information from the cBioPortal database. It however does not send over information that links a variant to a particular sample or patient. If this is a concern for your use case we recommmend to deploy your own versions of these services. See the sections above to linkouts for instructions on how to do this."]}],[{"l":"Hardware Requirements","p":["Hardware requirements will vary depending on the volume of users you anticipate will access your cBioPortal instance and the amount of data loaded in the portal. We run cbioportal.org on an AWS r5.xlarge instance with 32 GB and 4 vCPUs. The public database consumes ~ 50 GB of disk space. The site is visited by several thousands of users a day. For on-premise installation recommendations one can look at the AWS instance type specs:","Platform","instance type","(v)CPUs","RAM(GB)","Storage (GB)","aws","r5.xlarge","4","32","50","on-premise","-","The hardware requirements are much lower when one has only a few users a day. Minimally, 2GB of RAM is needed to run a cBioPortal instance. If you do not plan to import public studies, depending on the size of your private data, 10GB of disk space may be sufficient.","Another possible consideration is caching. The portal can cache responses to requests so that repeated database queries are avoided. On the public cBioPortal deployment we enable this cache and allocate 1GB of additional RAM and 4GB of additional disk space for caching. For directions on configuring caching, see Ehcache Settings"]}],[{"l":"Deploy with Docker"},{"l":"Prerequisites","p":["Docker provides a way to run applications securely isolated in a container, packaged with all its dependencies and libraries. To learn more on Docker, kindly refer here: Docker overview.","Make sure that you have the latest version of Docker installed on your machine. Get latest version","Notes for non-Linux systems"]},{"l":"Usage instructions","p":["In this example we use Docker Compose to spin up all the different required containers/services for cBioPortal."]},{"l":"Quick Start","p":["You should now be able to see the cBioPortal website at http://localhost:8080","Import studies with:","Clear persistent data volumes with:"]},{"l":"Comprehensive Start"},{"i":"step-1---run-docker-compose","l":"Step 1 - Run Docker Compose","p":["Download the git repo that has the Docker compose file and go to the root of that folder:","Then download all necessary files (seed data, example config and example study from datahub) with the init script:","Then run:","This will start all four containers (services) defined here. That is:","the mysql database, which holds most of the cBioPortal data","the cBioPortal Java web app, this serves the React frontend as well as the REST API","the session service Java web app. This service has a REST API and stores session information (e.g. what genes are being queried) and user specific data (e.g. saved cohorts) in a separate mongo database","the mongo database that persists the data for the session service","It will take a few minutes the first time to import the seed database and perform migrations if necessary. Each container outputs logs to the terminal. For each log you'll see the name of the container that outputs it (e.g. cbioportal_container or cbioportal_session_database_container). If all is well you won't see any significant errors (maybe some warnings, that's fine to ignore). If all went well you should be able to visit the cBioPortal homepage on http://localhost:8080. You'll notice that no studies are shown on the homepage yet:","Go to the next step to see how to import studies."]},{"l":"Notes on detached mode","p":["If you prefer to run the services in detached mode (i.e. not logging everything to your terminal), you can run","In this mode, you'll have to check the logs of each container manually using e.g.:","You can list all containers running on your system with"]},{"i":"step-2---import-studies","l":"Step 2 - Import Studies","p":["To import studies you can run:","This will import the lgg_ucsf_2014 study into your local database. It will take a few minutes to import. After importing, restart the cbioportal web container:","or","All public studies can be downloaded from https://www.cbioportal.org/datasets, or https://github.com/cBioPortal/datahub/. You can add any of them to the ./study folder and import them. There's also a script (./study/init.sh) to download multiple studies. You can set DATAHUB_STUDIES to any public study id (e.g. lgg_ucsf_2014) and run ./init.sh."]},{"l":"Notes on restarting","p":["To avoid having to restart one can alternatively hit an API endpoint. To do so, call the /api/cache endpoint with a DELETE http-request (see here for more information):","The value of the API key is configured in the portal.properties file. You can visit http://localhost:8080 again and you should be able to see the new study."]},{"i":"step-3---customize-your-portalproperties-file","l":"Step 3 - Customize your portal.properties file","p":["The properties file can be found in ./config/portal.properties. Which was set up when running init.sh.","This properties file allows you to customize your instance of cBioPortal with e.g. custom logos, or point the cBioPortal container to e.g. use an external mysql database. See the properties documentation for a comprehensive overview.","If you would like to enable OncoKB see OncoKB data access for how to obtain a data access token. After obtaining a valid token use:"]},{"i":"step-4---customize-cbioportal-setup","l":"Step 4 - Customize cBioPortal setup","p":["To read more about the various ways to use authentication and parameters for running the cBioPortal web app see the relevant backend deployment documentation.","On server systems that can easily spare 4 GiB or more of memory, set the -Xms and -Xmx options to the same number. This should increase performance of certain memory-intensive web services such as computing the data for the co-expression tab. If you are using MacOS or Windows, make sure to take a look at these notes to allocate more memory for the virtual machine in which all Docker processes are running."]},{"l":"More commands","p":["For documentation on how to import a study, see this tutorial For more uses of the cBioPortal image, see this file","To Dockerize a Keycloak authentication service alongside cBioPortal, see this file."]},{"l":"Uninstalling cBioPortal"}],[{"l":"Import data with Docker"},{"l":"Import data instructions","p":["This is an example to import a sample study: study_es_0. When trying to import other studies, please follow the same routine:","import gene panels (if applicable, studies without gene panels are assumed to be whole exome/genome)","import study data"]},{"i":"step-1---import-gene-panels","l":"Step 1 - Import gene panels","p":["To import gene panels for your study, please reference the example commands in this file","These are the commands for importing study_es_0 gene panels ( data_gene_panel_testpanel1 and data_gene_panel_testpanel2):"]},{"i":"step-2---import-data","l":"Step 2 - Import data","p":["To import data for your study, please reference the example commands in this file","Command for importing study_es_0 data:","⚠️ after importing a study, remember to restart cbioportal to see the study on the home page. Run docker-compose restart cbioportal.","You have now imported the test study study_es_0. Note that this study is included inside the cbioportal container. The process for adding a study that is outside of the container is similar. Just make sure to add the data files in the ./study folder. This folder is mounted as /study/ inside of the container."]},{"l":"Frequently Asked Questions"},{"l":"Gene panel ID is not in database","p":["If you see an error like this when you importing the data: ERROR: data_gene_panel_matrix.txt: lines [2, 3, 4, (10 more)]: Gene panel ID is not in database. Please import this gene panel before loading study data.; values encountered: ['TESTPANEL1', 'TESTPANEL2']","please follow the first step to import gene panels (e.g. import data_gene_panel_testpanel1 and data_gene_panel_testpanel2 for study_es_0), then try to import the data again."]},{"l":"Error occurred during validation step","p":["Please make sure the seed database was correctly imported."]},{"i":"study-imported-correctly-but-got-error-when-trying-to-query-something","l":"Study imported correctly, but got error when trying to query something","p":["Remember to restart the cbioportal after data imported."]},{"l":"Import GRCh38 data","p":["If you are importing GRCh38 data, please remember to set the reference_genome: hg38 field in the meta_study.txt file. See also cancer study metadata."]}],[{"l":"Example commands"},{"l":"Importing gene panel","p":["Use this command to import a gene panel. Specify the gene panel file by replacing path_to_genepanel_file with the absolute path to the gene panel file. Another option is to add the gene panel files in ./study which is mounted inside the container on `/study/."]},{"l":"Importing data","p":["Use this command to validate a dataset. Add the study to the ./study folder. The command will connect to the web API of the container cbioportal-container, and import the study in its associated database. Make sure to replace path_to_report_folder with the absolute path were the html report of the validation will be saved.","⚠️ after importing a study, remember to restart cbioportal-container to see the study on the home page. Run docker-compose restart cbioportal."]},{"l":"Using cached portal side-data","p":["In some setups the data validation step may not have direct access to the web API, for instance when the web API is only accessible to authenticated browser sessions. You can use this command to generate a cached folder of files that the validation script can use instead. Make sure to replace path_to_portalinfo with the absolute path where the cached folder is going to be generated.","Then, grant the validation/loading command access to this folder and tell the script it to use it instead of the API:"]},{"l":"Inspecting or adjusting the database"},{"l":"Deleting a study","p":["To remove a study, run:","Where study_id is the cancer_study_identifier of the study you would like to remove."]}],[{"l":"Authenticating and Authorizing Users using Keycloak in Docker","p":["This guide describes a way to Dockerise Keycloak along with cBioPortal, for authentication.","First, create an isolated network in which the Keycloak and MySQL servers can talk to one another.","Run a MySQL database in which Keycloak can store its data. This database server will not be addressable from outside the Docker network. Replace path_to_database with the absolute path where the folder kcdb-files will be placed. This folder is used by the database to store its files.","Then run the actual Keycloak server, using this image available from Docker Hub. This will by default connect to the database using the (non-root) credentials in the example above. The server will be accessible to the outside world on port 8180, so make sure to choose a strong administrator password.","The command below uses the default values for MYSQL_DATABASE, MYSQL_USER and MYSQL_PASSWORD(listed in the command above). If you wish to change these credentials, specify them in the command below. For instance, if MYSQL_USER in the database container is user, you need to add -e MYSQL_USER=user.","Finally, configure Keycloak and cBioPortal as explained in the Keycloak documentation. Remember to specify port 8180 for the Keycloak server, wherever the guide says 8080.","After configuring Keycloak, set up cBioPortal containers as specified in the documentation. Make sure to update the -Dauthenticate in the docker-compose file to -Dauthenticate=saml."]}],[{"l":"Software Requirements","p":["This page describes various system software required to run the cBioPortal."]},{"l":"MySQL","p":["The cBioPortal software should run properly on MySQL version 5.7.x. Versions higher than 5.7.x can cause an issue while loading the database schema. Minor versions lower than 5.7.x will cause issues with persistent cache invalidation. The software can be found and downloaded from the MySQL website.","On Ubuntu: sudo apt-get install mysql-server"]},{"l":"MongoDB","p":["The session service uses MongoDB 3.6.6"]},{"l":"Java","p":["As of this writing, the cBioPortal can be compiled and run from Java 11 and above. The software can be found and download from the Oracle website.","On Ubuntu: sudo apt-get install default-jdk"]},{"l":"Apache Maven","p":["The cBioPortal source code is an Apache Maven driven project. The software needs to be downloaded and installed prior to building the application from source code. It can be found on the Apache Maven website. We are currently using version 3.5.4.","On Ubuntu: sudo apt-get install maven"]},{"l":"Git","p":["You will need a git client to download the cBioPortal source code.","On Ubuntu: sudo apt-get install git"]}],[{"l":"Pre-Build Steps"},{"l":"Get the Latest Code","p":["Make sure that you have cloned the last code, and make sure you are on the master branch:"]},{"i":"prepare-the-log4jproperties-file","l":"Prepare the log4j.properties File","p":["This file configures logging for the portal. An example file is available within GitHub:","If you don't create your own logback.xml, maven will copy the EXAMPLE file to that location when it builds. If logback.xml already exists, it will just use that. This allows us to give you a working, versioned log config, which you can then override easily.","To modify the logging during tests the same EXAMPLE file can be copied to the relevant test resources folder."]},{"l":"Create the cBioPortal MySQL Databases and User","p":["You must create a cbioportal database and a cgds_test database within MySQL, and a user account with rights to access both databases. This is done via the mysql shell."]}],[{"l":"Building from Source"},{"l":"Building with Maven","p":["While building, you must point the environment variable PORTAL_HOME to the root directory containing the portal source code.","For example, run a command like the following if on macOS:","To compile the cBioPortal source code, move into the source directory and run the following maven command:","After this command completes, you will find a cbioportal.war file suitable for Apache Tomcat deployment in portal/target/. It is not neccessary to install Tomcat yourself, since a command line runnable version of Tomcat is provided as a dependency in portal/target/dependency/webapp-runner.jar.","However, if you will be deploying to a standalone Tomcat installation, and if you have configured Tomcat to use the Redisson client for user session management, you should expect a clash between the Redisson client being used for session management and the Redisson client which is embedded in the cbioportal.war file for the optional \"redis\" persitence layer caching mode. In this case, you should avoid using the \"redis\" option for the portal property persistence.cache_type and you should prevent the Redisson client from being packaged in cbioportal.war by building with this command instead:"]},{"l":"alternative for standalone tomcat deployments which use redis session management"}],[{"l":"Importing the Seed Database","p":["The next step is to populate your cBioPortal instance with all the required background data sets. This includes for example gene data, ID mappings, and network interactions. Rather than importing each of these data sets individually, we have provided a simple \"seed\" database that you can import directly."]},{"l":"Download the cBioPortal Seed Database","p":["A cBioPortal seed database for human can be found on the datahub page. If you are looking for mouse, check this link.","After download, the files can be unzipped by entering the following command:"]},{"l":"Import the cBioPortal Seed Database","p":["Important: Before importing, make sure that you have followed the pre-build steps for creating the cbioportal database (see section \"Create the cBioPortal MySQL Databases and User\").","Import the database schema (/db-scripts/src/main/resources/cgds.sql):","Note that this may currently fail when using the default character encoding on MySQL 8.0 ( utf8mb4); this is why MySQL 5.7 (which uses latin1) is recommended.","Import the main part of the seed database:","Important: Replace seed-cbioportal_RefGenome_vX.Y.Z.sql with the downloaded version of the seed database, such as seed-cbioportal_hg19_v2.3.1.sql or seed-cbioportal_mm10_v2.3.1.sql.","(Human only) Import the Protein Data Bank (PDB) part of the seed database. This will enable the visualization of PDB structures in the mutation tab. Loading this file takes more time than loading the previous files, and is optional for users that do not require PDB structures.","Important: Replace seed-cbioportal_hg19_vX.Y.Z_only-pdb.sql with the downloaded version of the PDB database, such as seed-cbioportal_hg19_v2.3.1_only-pdb.sql.","(optional : support for microRNA genomic profiles) Import constructed gene table records for microRNA genomic profiles. Currently, cBioPortal supports the combined display of copy number alterations (generally reported for microRNA precursors) and expression (generally reported for microRNA mature forms) by adding gene table records which represent the combination of microRNA precursor and microRNA mature form. Appropriate aliases are added to the gene_alias table so that both the name of the precursor and the name of the mature form are recognized references to the combination.","After the code has been successfully configured and built, you can import the needed microRNA records by running the following command from the $PORTAL_HOME directory:","Important: Please be aware of the version of the seed database. In the README on datahub, we stated which version of cBioPortal is compatible with the current seed database.","If the database is older than what cBioPortal is expecting, the system will ask you (during startup or data loading) to migrate the database to a newer version. The migration process is described here."]}],[{"l":"Deploying the Web Application"},{"l":"Prepare the global configuration file","p":["The portal is configured using a global configuration file, portal.properties. An example file is available in the src/main/resources folder. Use it as a template to create your own:","For more information about the portal.properties file, see the reference page.","Several scripts of cBioPortal use this portal.properties file to get info like db connection parameters. You can indicate the folder where this file is with an environment variable:","if your properties file is at PORTAL_HOME/portal.properties"]},{"l":"Run cBioPortal Session Service","p":["The cBioPortal app requires session service. For instructions on how to run this without Docker see https://github.com/cBioPortal/session-service#run-without-docker. Once this is working, update the properties file:"]},{"l":"Run the cbioportal backend","p":["To run the app we use webapp-runner. It's a command line version of Tomcat provided by Heroku. All parameters can be seen with:","This runs the app in the foreground. If a port is already in use it will raise an error mentioning that. To change the port use the --port flag.","There are three main ways to run the portal: without authentication, with optional login and with required login. All of them require the cBioPortal session service to be running."]},{"l":"Without authentication","p":["In this mode users are able to use the portal, but they won't be able to save their own virtual studies and groups. See the optional login section to enable this."]},{"l":"Optional login","p":["In this mode users can see all the data in the portal, but to save their own groups and virtual studies they are required to log in. This will allow them to store user data in the session service. See the tutorials section to read more about these features.","Google and Microsoft live are supported as optional login currently. Possible values for authenticate are","One needs to set the Google/Microsoft related configurations in the portal.properties file:","See Google's Sign in Documentation to obtain these values.","See Microsoft Sign in Documentation to obtain these values."]},{"l":"Required login","p":["Change CHOOSE_DESIRED_AUTHENTICATION_METHOD to one of googleplus, saml, openid, ad, ldap. The various methods of authentication are described in the Authorization and Authentication section."]},{"l":"Property configuration","p":["The configuration defined in portal.properties can also be passed as command line arguments. The priority of property loading is as follows:","-D command line parameters overrides all","${PORTAL_HOME}/portal.properties","portal.properties supplied at compile time","Defaults defined in code","Note that the authenticate property is currently required to be set as a command line argument, it won't work when set in portal.properties(See issue #6109).","Some scripts require a ${PORTAL_HOME}/portal.properties file, so it is best to define the properties there."]},{"l":"Note for Tomcat Deployers","p":["Before we were using webapp-runner, our documentation recommended a system level installed Tomcat. In this case people might have been using dbconnector=jndi instead of the new default dbconnector=dbcp. There is a known issue where setting dbconnector in the properties file does not work (#6148). It needs to be set as a command line argument. For Tomcat this means CATALINA_OPT=-Ddbconnector=jndi."]},{"l":"Verify the Web Application","p":["Lastly, open a browser and go to: http://localhost:8080"]},{"l":"Important","p":["Each time you modify any java code, you must recompile and redeploy the app.","Each time you modify any properties (see customization options), you must restart the app","Each time you add new data, you must restart the app or call the /api/cache endpoint with a DELETE http-request (see here for more information)."]}],[{"l":"Loading a Sample Study","p":["Once you have confirmed that the cBioPortal server is installed, you are ready to import data. Importing a sample study is recommended to verify that everything is working correctly.","The cBioPortal distribution includes a small dummy study, study_es_0, which contains all datatypes supported by cBioPortal. This document describes how to import the prerequisites for the sample study and how to import the study itself."]},{"l":"Set the PORTAL_HOME environment variable","p":["Most cBioPortal command-line tools, including the data loading pipeline, expect the environment variable $PORTAL_HOME to point to a folder containing the portal.properties configuration file, as explained during the previous step.","Configure your shell to keep the variable set to the right folder. On GNU/Linux and macOS this usually means appending a line like the following to .bash_profile in your home directory:"]},{"l":"Import Gene Panel for Sample Study","p":["The sample gene panel has to be imported before gene panel study data can be added to the database.","After loading gene panels into the database, please restart Tomcat or call the /api/cache endpoint with a DELETE http-request(see here for more information) so that the validator can retrieve gene panel information from the cBioPortal API.","More details to load your own gene panel and gene set data can be found here: Import Gene Panels."]},{"l":"Validating the Sample Study","p":["First it's useful to validate the study study_es_0, to check if the data is formatted correctly.","To do so, go to the importer folder:","and then run the following command:","If all goes well, you should see the final output message:"]},{"l":"Importing the Sample Study","p":["To import the sample study:","and then run the following command:","You will see a series of output messages, hopefully ending with a status message like this:","After loading the study data, please restart the app or call the /api/cache endpoint with a DELETE http-request(see here for more information)."]}],[{"l":"User Authorization","p":["This step is only required if you intend on running an instance of the portal that supports user authorization.","Two tables need to be populated in order to support user authorization."]},{"i":"table--users","l":"Table: users","p":["This table contains all the users that have authorized access to the instance of the portal. The table requires a user's email address, name, and integer flag indicating if the account is enabled.","An example entry would be:","Note, if the ENABLED value is set to 0, the user will be able to login to the portal, but will see no studies.","You need to add users via MySQL directly. For example:"]},{"i":"table--authorities","l":"Table: authorities","p":["This table contains the list of cancer studies that each user is authorized to view. The table requires a user email address and an authority (e.g. cancer study) granted to the user.","Some example entries would be:","The value in the EMAIL column should be the same email address contained in the USER table.","The value in the AUTHORITY column is made of two parts:","The first part is the name of your portal instance. This name should also match the app.name property found in the portal.properties file.","Following a colon delimiter, the second part is the cancer_study_identifier of the cancer study this user has rights to access.","If the user has rights to all available cancer studies, a single entry with the keyword app.name: + \"ALL\" is sufficient (so e.g. \"cbioportal:ALL\").","You need to add users via MySQL directly. For example:","Important Note: The cancer study identifier is not CASE sEnsitive. So it can be UPPER CASE, or just how it is stored in the cancer_study table. Changes to these tables become effective the next time the user logs in."]},{"l":"Using groups","p":["It is also possible to define groups and assign multiple studies and users to a group. You can add a group name to the cancer_study table GROUPS column. This same group name can be used in the AUTHORITY column of the authorities table mentioned above."]},{"i":"example","l":"Example:","p":["We want to create the group \"TEST_GROUP1\" and assign two existing studies to it and give our user 'john.smith@gmail.com' access to this group of studies. Steps:","1- Find your studies in table cancer_study","2- Update the GROUPS field, adding your \"TEST_GROUP1\" to it. ⚠️ This is a ; separated column, so if you want a study to be part of multiple groups, separate them with ;.","If GROUPS already has a value (like for study 93 in example above) then add \";TEST_GROUP1\" to ensure existing groups are not ovewritten.","3- Check the result:","4- Add the group to user 'john.smith@gmail.com', using app.name:+ \"TEST_GROUP1\" like so:","After next login, the user 'john.smith@gmail.com' will have access to these two studies."]},{"l":"Configuring PUBLIC studies","p":["To enable a set of public studies that should be visible to all users, without the need to configure this for each user in the authorities and users tables, you can set the property always_show_study_group in portal.properties file. For example, you can set:","This will enable the word \"PUBLIC\" to be used in the column GROUPS of the table cancer_study to indicate which studies should be always shown to any authenticated user, regardless of authorization configurations."]},{"i":"example-1","l":"Example:","p":["To reuse the example table above, let's assume the property always_show_study_group is set as indicated above and the cancer_study table contents are set to the following:","In this case, the study brca_tcga will be visible to any authenticated user while the study acc_tcga will be visible only to users configured to be part of GROUPB or TEST_GROUP1"]}],[{"l":"Introduction","p":["The cBioPortal includes support for SAML (Security Assertion Markup Language). This document explains why you might find SAML useful, and how to configure SAML within your own instance of cBioPortal.","Please note that configuring your local instance to support SAML requires many steps. This includes configuration changes and a small amount of debugging. If you follow the steps below, you should be up and running relatively quickly, but be forewarned that you may have do a few trial runs to get everything working.","In the documentation below, we also provide details on how to perform SAML authentication via a commercial company: OneLogin. OneLogin provides a free tier for testing out SAML authentication, and is one of the easier options to get a complete SAML workflow set-up. Once you have OneLogin working, you should then have enough information to transition to your final authentication service."]},{"i":"what-is-saml","l":"What is SAML?","p":["SAML is an open standard that enables one to more easily add an authentication service on top of any existing web application. For the full definition, see the SAML Wikipedia entry.","In its simplest terms, SAML boils down to four terms:","identity provider: this is a web-based service that stores user names and passwords, and provides a login form for users to authenticate. Ideally, it also provides easy methods to add / edit / delete users, and also provides methods for users to reset their password. In the documentation below, OneLogin.com serves as the identity provider.","service provider: any web site or web application that provides a service, but should only be available to authenticated and authorized users. In the documentation below, the cBioPortal is the service provider.","authentication: a means of verifying that a user is who they purport to be. Authentication is performed by the identify provider, by extracting the user name and password provided in a login form, and matching this with information stored in a database. When authentication is enabled, multiple cancer studies can be stored within a single instance of cBioPortal while providing fine-grained control over which users can access which studies. Authorization is implemented within the core cBioPortal code, and not the identify provider."]},{"i":"why-is-saml-relevant-to-cbioportal","l":"Why is SAML Relevant to cBioPortal?","p":["The cBioPortal code has no means of storing user name and passwords and no means of directly authenticating users. If you want to restrict access to your instance of cBioPortal, you therefore have to consider an external authentication service. SAML is one means of doing so, and your larger institution may already provide SAML support. For example, at Sloan Kettering and Dana-Farber, users of the internal cBioPortal instances login with their regular credentials via SAML. This greatly simplifies user management."]},{"l":"Setting up an Identity Provider","p":["As noted above, we provide details on how to perform SAML authentication via a commercial company: OneLogin. If you already have an IDP set up, you can skip this part and go to Configuring SAML within cBioPortal.","OneLogin provides a free tier for testing out SAML authentication, and is one of the easier options to get a complete SAML workflow set-up. Once you have OneLogin working, you should then have enough information to transition to your final authentication service. As you follow the steps below, the following link may be helpful: How to Use the OneLogin SAML Test Connector.","To get started:","Register a new OneLogin.com Account"]},{"l":"Setting up a SAML Test Connector","p":["\"SAVE\" the app, then select the Configuration Tab.","ACS (Consumer) URL Validator*: ^ http://localhost:8080/cbioportal/saml/SSO$","ACS (Consumer) URL*: http://localhost:8080/saml/SSO","Add at least the parameters:","Audience: cbioportal","Configure these email parameters in the Users menu:","Email (Attribute)","Email (SAML NameID)","Find your user in the \"Users\" menu","Link the SAML app to your user (click \"New app\" on the + icon found on the top right of the \"Applications\" table to do this - see screenshot below):","Login to OneLogin.com.","Recipient: http://localhost:8080/saml/SSO","Search for SAML.","Select the option labeled: OneLogin SAML Test (IdP w/attr).","Under Apps, Select Add Apps.","Under the Configuration Tab for OneLogin SAML Test (IdP w/attr), paste the following fields (this is assuming you are testing everything via localhost)."]},{"l":"Downloading the SAML Test Connector Meta Data","p":["Go to the SSO Tab within OneLogin SAML Test (IdP), find the field labeled: Issuer URL. Copy this URL and download it's contents. This is an XML file that describes the identity provider.","then, move this XML file to:","You should now be all set with OneLogin.com. Next, you need to configure your instance of cBioPortal."]},{"l":"Configuring SAML within cBioPortal"},{"l":"Creating a KeyStore","p":["In order to use SAML, you must create a Java Keystore.","This can be done via the Java keytool command, which is bundled with Java.","Type the following:","This will create a Java keystore for a key called: secure-key and place the keystore in a file named samlKeystore.jks. You will be prompted for:","keystore password (required, for example: apollo1)","your name, organization and location (optional)","key password for secure-key(required, for example apollo2)","When you are done, copy samlKeystore.jsk to the correct location:","If you need to export the public certificate associated within your keystore, run:"]},{"l":"HTTPS and Tomcat","p":["⚠️ If you already have an official (non-self-signed) SSL certificate, and need to get your site running on HTTPS directly from Tomcat, then you need to import your certificate into the keystore instead. See this Tomcat documentation page for more details.","⚠️ An extra warning for when configuring HTTPS for Tomcat: use the same password for both keystore and secure-key. This seems to be an extra restriction by Tomcat."]},{"l":"Modifying configuration","p":["Within portal.properties, make sure that:","Then, modify the section labeled authentication. See SAML parameters shown in example below:","Please note that you will have to modify all the above to match your own settings. saml.idp.comm.binding.type can be left empty if saml.idp.comm.binding.settings=defaultBinding. The saml.logout.* settings above reflect the settings of an IDP that supports Single Logout (hopefully the default in most cases - more details in section below).","In the case that you are running cBioPortal behind a reverse proxy that handles the SSL certificates (such as nginx or traefik), you will have to also specify saml.sp.metadata.entitybaseurl. This should point to https://host.example.come:443. This setting is required such that cBioPortal uses the Spring SAML library appropriately for creating redirects back into cBioPortal.","In addition there is a known bug where redirect from the cBioPortal instance always goes over http instead of https ( https://github.com/cBioPortal/cbioportal/issues/6342). To get around this issue you can pass the full URL including https to the webapp-runnner.jar command with e.g. --proxy-base-url https://mycbioportalinstance.org."]},{"l":"Custom scenarios","p":["ℹ️ Some settings may need to be adjusted to non-default values, depending on your IDP. For example, if your IDP required HTTP-GET requests instead of HTTP-POST, you need to set these properties as such:","If you need a very different parsing of the SAML tokens than what is done at org.cbioportal.security.spring.authentication.saml.SAMLUserDetailsServiceImpl, you can point the saml.custom.userservice.class to your own implementation:","⚠️ The properties saml.idp.metadata.attribute.email, and saml.idp.metadata.attribute.userName can also vary per IDP. It is important to set these correctly since these are a required field by the cBioPortal SAML parser (that is, if org.cbioportal.security.spring.authentication.saml.SAMLUserDetailsServiceImpl is chosen for property saml.custom.userservice.class).","⚠️ Some IDPs like to provide their own logout page (e.g. when they don't support the custom SAML Single Logout protocol). For this you can adjust the saml.logout.url property to a custom URL provided by the IDP. Also set the saml.logout.local=true property in this case to indicate that global logout (or Single Logout) is not supported by IDP:","⚠️ Some IDPs (e.g. Azure Active Directory) cache user data for more than 2 hours causing cbioportal to complain that the authentication statement is too old to be used. You can fix this problem by setting forceAuthN to true. Below is an example how you can do this with the properties. You can choose any binding type you like. bindings:HTTP-Redirect is given just as an example."]},{"l":"More customizations","p":["If your IDP does not have the flexibility of sending the specific credential fields expected by our default \"user details parsers\" implementation (i.e. security/security-spring/src/main/java/org/cbioportal/security/spring/authentication/saml/SAMLUserDetailsServiceImpl.java expects field mail to be present in the SAML credential), then please let us know via a new issue at our issue tracking system, so we can evaluate whether this is a scenario we would like to support in the default code. You can also consider adding your own version of the SAMLUserDetailsService class."]},{"l":"Authorizing Users","p":["Next, please read the Wiki page on User Authorization, and add user rights for a single user."]},{"i":"configuring-the-loginjsp-page-not-applicable-to-most-external-idps","l":"Configuring the Login.jsp Page (not applicable to most external IDPs)","p":["The login page is configurable via the portal.properties properties skin.authorization_message and skin.login.saml.registration_htm. For example in skin.authorization_message you can be set to something like this:","and skin.login.saml.registration_htm can be set to:","You can also set a standard text in skin.login.contact_html that will appear in case of problems:"]},{"l":"Doing a Test Run","p":["You are now ready to go.","Rebuild the WAR file and follow the Deployment with authentication steps using authenticate=saml.","Then, go to: http://localhost:8080/.","If all goes well, the following should happen:","You will be redirected to the OneLogin Login Page.","After authenticating, you will be redirected back to your local instance of cBioPortal.","If this does not happen, see the Troubleshooting Tips below."]},{"l":"Troubleshooting Tips"},{"l":"Logging","p":["Getting this to work requires many steps, and can be a bit tricky. If you get stuck or get an obscure error message, your best bet is to turn on all DEBUG logging. This can be done via src/main/resources/logback.xml. For example:","Then, rebuild the WAR, redeploy, and try to authenticate again. Your log file will then include hundreds of SAML-specific messages, even the full XML of each SAML message, and this should help you debug the error."]},{"l":"Seeing the SAML messages","p":["Another tool we can use to troubleshoot is SAML tracer ( https://addons.mozilla.org/en-US/firefox/addon/saml-tracer/). You can add this to Firefox and it will give you an extra menu item in \"Tools\". Go through the loging steps and you will see the SAML messages that are sent by the IDP."]},{"l":"Obtaining the Service Provider Meta Data File","p":["By default, the portal will automatically generate a Service Provider (SP) Meta Data File. You may need to provide this file to your Identity Provider (IP).","You can access the Service Provider Meta Data File via a URL such as:","http://localhost:8080/saml/metadata"]}],[{"l":"Authenticating Users via LDAP","p":["To connect cBioPortal to an external user database such as Active Directory will require the installation of Keycloak. Please read the Wiki page on Authenticating and Authorizing Users via Keycloak for information on how to connect the cBioPortal with Keycloak. You can also read how to connected Keycloak to Active Directory via LDAP on the User Storage Federation webpage of the Keycloak website."]}],[{"l":"Authenticating and Authorizing Users via Keycloak","p":["⚠️ This documentation is for keycloak =\" driver annotation sources in the settings menu of Results View can be hidden by turning off the following property (default: true):","\"COSMIC>=\" driver annotation sources in the settings menu of Results View can be hidden by turning off the following property (default: true):"]},{"l":"CIViC integration","p":["CIViC integration can be turned on or off with the following property (default: true):","The CIViC API url is set to https://civic.genome.wustl.edu/api/ by default. It can be overridden using the following property:"]},{"l":"Genome Nexus Integration","p":["Genome Nexus provides annotations of mutations in cBioPortal. The mutations tab relies heavily on the Genome Nexus API, therefore that tab won't work well without it. By default cBioPortal will use the public Genome Nexus API, such that no extra configuration is necessary."]},{"l":"Genome Build","p":["Genome Nexus supports both GRCh37 and GRCh38, but support for the latter is limited. Several annotation sources served by Genome Nexus might not have official GRCh38 support yet i.e. OncoKB, CIViC, Cancer Hotspots, My Cancer Genome and 3D structures. Although most of the time the canonical transcript for a gene will be the same between GRCh37 and GRCh38 there might be some that cause issues. In addition the complete integration of cBioPortal with Genome Nexus' GRCh38 is not complete yet. That is cBioPortal currently only connects to one Genome Nexus API by default (the GRCh37 one), so it's not possible to have multiple genome builds in one instance of cBioPortal and get the correct annotations from Genome Nexus for both. Currently only the mutation mapper tool page is able to handle both."]},{"l":"Properties","p":["By default the Genome Nexus API url is set to https://v1.genomenexus.org/, which uses GRCh37. It can be overridden using the following property:","Genome Nexus provides a set of mappings from Hugo genes names to Ensembl transcript IDs. There are two mappings: mskcc and uniprot. You can read more about the difference between those in the Mutation Data Annotation Section. The default is currently uniprot, but we recommend new installers to use mskcc and people with older installations to consider migrating. The property can be changed with:","The mutation mapper tool page can annotate GRCh38 coordinates. By default it uses https://grch38.genomenexus.org. It can be overridden by setting:","The GRCh38 annotation in mutation mapper can be hidden by setting show.mutation_mappert_tool.grch38=false, by default it's set to true."]},{"l":"MDACC Heatmap Integration","p":["MDACC Heatmap integration (button in OncoPrint heatmap dropdown and tab on Study page can be turned on or off by setting the following property:"]},{"l":"OncoPrint","p":["The default view in OncoPrint (\"patient\" or \"sample\") can be set with the following option. The default is \"patient\".","Configuration of tracks that will be visible by default in the oncoprint. It points to a JSON file on the classpath."]},{"l":"Custom annotation of driver and passenger mutations","p":["cBioPortal supports 2 formats to add custom annotations for driver and passenger mutations.","cbp_driver: This will define whether a mutation is a driver or not.","cbp_driver_tiers: This can be used to define multiple classes of driver mutations.","These data formats are described in the cBioPortal MAF specifications.","Enabling custom annotations in the OncoPrint","To enable functionality for one or both types of custom annotations, enter values for the following properties. These labels will appear in the OncoPrint's \"Mutation color\" menu.","Automatic selection of OncoKB, hotspots and custom annotations","OncoKB and Hotspots are by default automatically selected as annotation source, if show.oncokb and show.hotspots are set to true. To add automatic selection of custom driver or custom driver tiers annotations, set the respective property to true. Default is false.","If you want to disable the automatic selection of OncoKB and hotspots as annotation source, set these properties to false:","If you want to enable oncoprint heatmap clustering by default, set this property to true:","Automatic hiding of variants of unknown significance (VUS)","By default, the selection box to hide VUS mutations is unchecked. If you want to automatically hide VUS, set this property to true. Default is false."]},{"l":"Gene sets used for gene querying","p":["To change the gene sets used for gene querying, create a JSON file and add gene sets, following the format specified in the examples below. Set the path to this file (e.g. file:/cbioportal/custom_gene_sets.json) in the following property and restart Tomcat to apply the update. The default gene sets will be replaced by the ones in custom_gene_sets.json."]},{"l":"Example with gene names","p":["In this example, two gene sets will appear in the query page, under the names \"Prostate Cancer: AR Signaling\" and \"Prostate Cancer: AR and steroid synthesis enzymes\"."]},{"l":"Example with specific alterations","p":["In this example, only one gene set will appear in the query page, under the name \"Genes with alterations\", which will add the different genetic alterations stated below in the query box."]},{"l":"Example with merged gene tracks","p":["In this example, only one gene set will appear in the query page, under the name \"BRCA genes test\", containing the merged gene track called \"BRCA genes\".","This gene set will add the following in the query box:"]},{"l":"Cache Settings","p":["cBioPortal is supported on the backend with Ehcache or Redis. These caches are configurable from within portal.properties through the following properties.","The cache type is set using persistence.cache_type. Valid values are no-cache, redis(redis), ehache-heap(ehcache heap-only), ehache-disk(ehcache disk-only), and ehache-hybrid(ehcache disk + heap). By default, persistence.cache_type is set to no-cache which disables the cache. When the cache is disabled, no responses will be stored in the cache.","⚠️ the 'redis' caching option will likely cause a conflict when installing the portal in a Tomcat installation which uses redisson for session management. If you plan to deploy cbioportal to such a system, avoid the 'redis' caching option for persistence.cache_type and be sure to build cbioportal.war with the maven option -Dexclude-redisson(see Building with Maven).","Logged metrics and additional information such as cache size and cached keys are available through an optional endpoint. The optional endpoint is turned off by default but can be turned on by setting cache.statistics_endpoint_enabled to true.","The cache statistics endpoint is hidden on the api page; users must directly access the URL to view the response. The cache statistics endpoint can be accessed in the following ways.","For a list of all keys in the cache:","For a list of counts of keys in cache per repository class:","For general statistics about the cache such as memory usage (not currently implemented for Redis):","WARNING: It must be noted that since cache statistics endpoint returns data on cache keys, the endpoint may expose otherwise hidden database query parameters such as sample identifiers, study names, etc. Generally, it is recommended that the endpoint only be turned on during cache-related development for testing. Deployers of a protected portal where users only have authorities to a subset of studies should carefully consider whether or not to turn on the cache statistics endpoint, as it does not filter the results.","For more information on how caching is implemented in cBioPortal refer to the Caching documentation."]},{"l":"Redis","p":["To cache with Redis set persistence.cache_type to redis.","To setup the Redis cache servers the following properties are required:","If you are running one redis instance for multiple instances of cBioPortal, one can use the properties redis.name and redis.database to avoid clashes. If you are running only one instance of cBioPortal any value for name/database will do.","There are also some optional parameters:","redis.clear_on_startup: If true, the caches will clear on startup. This is important to do to avoid reading old study data from the cache. You may want to turn it off and clear redis yourself if you are running in a clustered environments, as you'll have frequent restarts that do not require you to clear the redis cache. redis.ttl_mins: The time to live of items in the general cache, in minutes. The default value is 10000, or just under 7 days.","For more information on Redis, refer to the official documentation here"]},{"l":"Ehcache","p":["To cache with Ehcache set persistence.cache_type to ehache-heap(ehcache heap-only), ehache-disk(ehcache disk-only), or ehache-hybrid(ehcache disk + heap).","Ehcache initializes caches using a template found in an Ehcache xml configuration file. When caching is enabled, set ehcache.xml_configuration to the name of the Ehcache xml configuration file. The default provided is ehcache.xml; to change the cache template, directly edit this file. Alternatively, you can create your own Ehcache xml configuration file, place it under /persistence/persistence-api/src/main/resources/ and set ehcache.xml_configuration to /[Ehcache xml configuration filename].","If the cache is configured to use disk resources, users must make a directory available and set it with the ehcache.persistence_path property. Ehcache will create separate directories under the provided path for each cache defined in the ehcache.xml_configuration file.","Cache size must be set for heap and/or disk depending on which are in use; Ehcache requires disk size to be greater than heap size in a hybrid configuration. Zero is not a supported size and will cause an exception. Units are in megabytes. Default values are provided. The general repository cache is specified to use 1024MB of heap and 4096MB of disk. The static repository cache is specified to use 30MB of heap and 32MB of disk. For installations with increased traffic or data, cache sizes can be increased to further improve performance.","For more information on Ehcache, refer to the official documentation here"]},{"i":"evict-caches-with-the-apicache-endpoint","l":"Evict caches with the /api/cache endpoint","p":["DELETE http requests to the /api/cache endpoint will flush the cBioPortal caches, and serves as an alternative to restarting the cBioPortal application.","By default the endpoint is disabled. The endpoint can be enabled by setting:","Access to the endpoint is not regulated by the configured user authorization mechanism. Instead, an API key should be passed with the X-API-KEY header. The accepted value for the API key can be configured by setting (for example):"]},{"l":"Delegate user-authorization cache to Spring-managed cache","p":["For evaluation fo user permissions cBioPortal uses a user-authorization cache that is populated at startup. By setting the cache.cache-map-utils.spring-managed property to true this cache will be managed by the Spring caching solution such as EHCache or Redis. For more extended information, see here"]},{"l":"Enable GSVA functionality","p":["GSVA functionality can be enabled by uncommenting this line (and making sure it is set to true):"]},{"l":"Set default thresholds for geneset hierarchy"},{"l":"Collapses the tree widget of the geneset hierarchy dialog on initialization","p":["By default, the tree is expanded (property value is false)."]},{"l":"Cross study expression and protein data","p":["By default we hide expression data for multi-study queries as they are usually not normalized across studies. For the public cBioPortal for instance, only TCGA Pancan Atlas studies expression data has been normalized.","If you know the expression data in your instance is comparable, or is comparable for a subset of studies, you can configure a rule as follows.","The value of this property can be boolean (true|false) or a javascript function which executes at runtime and is passed the list of study objects being queried by the user and evaluates whether expression data can be safely displayed."]},{"l":"Combined Study View Summary Limits"},{"l":"Background","p":["A limit is added to prevent poor performance of Study View when selecting too large sample numbers."]},{"i":"properties-1","l":"Properties","p":["studyview.max_samples_selected: Limit is disabled when not set"]},{"l":"Behavior","p":["When these limits are exceeded the \"Explore Selected Studies\" button will be disabled on the Study View Page."]},{"l":"Request Body Compression"},{"i":"background-1","l":"Background","p":["Some REST endpoints that the cBioPortal frontend uses have request bodies that scale as your dataset increases. In portals where users commonly query more than 100,000 samples, we found that some of these request bodies could get as large as 20 Mb. These large request bodies pose a significant problem for users with poor upload speeds - some users experienced upload times of more than five minutes for these requests. Request body compression is our temporary solution to this problem. When this feature is toggled on, we compress the request bodies of a few problematic endpoints."]},{"i":"properties-2","l":"Properties","p":["There are two portal.property values related to this feature:","enable_request_body_gzip_compression: when true, the feature will be enabled.","request_gzip_body_size_bytes: the maximum allowable unzipped request body in bytes. Defaults to 80000000 (80 Mb)."]},{"i":"behavior-1","l":"Behavior","p":["This is a nonbreaking change. Any consumers of the cBioPortal API you have that send requests with uncompressed request bodies will continue to work, regardless of whether you turn this feature on or off.","If you turn this feature on, the cBioPortal API will now be able to handle any request with a gzipped request body, provided:","It is a POST request.","It has a Content-Encoding: gzip header."]},{"l":"Reasons to Enable This Feature","p":["You have studies with tens of thousands of samples.","You have users with poor upload speeds (< 1mb up)."]},{"l":"Reasons to Disable This Feature","p":["It is harder to debug gzipped requests","Chrome's copy request as CURL will not work.","The compressed request body is not human-readable.","It is a potential vector for denial of memory attacks.","Any request that has a body that takes significantly more space in memory than it does in the request body is potentially dangerous. We try to mitigate this by limiting the size of the unzipped request body via the request_gzip_body_size_bytes property, but at a fundamental level, this is still a concern.","Along these lines, if you do enable this feature, setting request_gzip_body_size_bytes to an arbitrarily large number would be unwise.","This is not a cure-all for performance issues","Most requests the cBioPortal makes do not have large request bodies, so most requests will not be compressed, and will see no performance improvement.","Users with good upload speeds will see minimal performance improvements, as their upload speed is not a bottleneck."]},{"i":"datasets-tab-study-download-links","l":"DataSets Tab (Study Download Links)"},{"i":"background-2","l":"Background","p":["The DataSets tab has the ability to create a download button that allows users to quickly download \"raw\" public studies."]},{"i":"properties-3","l":"Properties","p":["study_download_url: when set, the feature will be enabled"]},{"i":"behavior-2","l":"Behavior","p":["For private instances that want to replicate the public-portal they first must set up their studies they want available for download in a similar format to what is described in the Example section below. The studies are located on the public-portal at https://cbioportal-datahub.s3.amazonaws.com/. Then there is a study_list.json defined that list the studies that can be downloaded. The studies to be downloaded need to be compressed with the extension tar.gz"]},{"l":"Example","p":["We have set study_download_url property to https://cbioportal-datahub.s3.amazonaws.com/","study_list.json resides https://cbioportal-datahub.s3.amazonaws.com/study_list.json","[ acbc_mskcc_2015, acc_2019] Example of contents","acbc_mskcc_2015.tar.gz resides https://cbioportal-datahub.s3.amazonaws.com/acbc_mskcc_2015.tar.gz"]},{"l":"Prioritized studies on study selector view","p":["By default, the studies loaded into a local cBioPortal instance are organized based on their cancer type (i.e. Breast >> Other).","The value of this variable will create a custom category with studies on the top of the study selector view. The format for the string should be category1#study1a,study1b,study1c;category2#study2 (e.g., PanCancer Studies#msk_impact_2017), where the category can be any string and the study should be the study ID of the required uploaded study."]}],[{"l":"Backend Caching","p":["cBioPortal provides the option of caching information on the backend to improve performance. Without caching, every time a request is received by the backend, a query is sent to the database system for information, and the returned data is processed to construct a response. This may lead to performance issues as the entire process can be rather costly, especially for queries on larger studies. With caching turned on, query responses can be taken directly from the cache if they have already been constructed. They would only be constructed for the initial query."]},{"l":"Cache Configuration","p":["The portal is configured to use Ehcache or Redis for backend caching. Ehcache supports a hybrid (disk + heap), disk-only, and heap-only mode. Redis stores the cache in memory and periodically writes the updated data to disk. Cache configuration is specified inside portal.properties(more information here)."]},{"l":"Creating additional caches","p":["The default configuration initializes two separate caches; however, you may wish to introduce new caches for different datatypes. Please see the Redis and Ehcache sections to see how to set up a new cache in whichever system you are using."]},{"l":"Redis","p":["Cache initialization is handled inside the CustomRedisCachingProvider. To create additional caches (e.g creating a cache specifically for clinical data), new code must be added to the CustomRedisCachingProvider.","Within the CustomRedisCachingProvider, create your new cache using the CacheManager. The appName must be prepended to your cache name.","You also need to create a new cache resolver in RedisConfig.java:","The @Cacheable annotation must also be added (or adjusted) to function declarations to indicate which functions are to be cached. Those might look like this example:","For more information on linking caches to functions, refer to the documentation here."]},{"l":"Ehcache","p":["Within the CustomEhcachingProvider, initialize a new ResourcePoolsBuilder for the new cache and set the resources accordingly.","After initialzing the ResourcePoolsBuilder, create a CacheConfiguration for the new cache using the new ResourcePoolsBuilder just created.","Finally, add the new CacheConfiguration to the map of managed caches with a name for the cache.","You also need to create a new cache resolver in EhCacheConfig.java:","The @Cacheable annotation must also be added (or adjusted) to function declarations to indicate which functions are to be cached. Those might look like this example:","Additionally, new properties for setting cache sizes should be added to portal.properties and loaded into the CustomEhcachingProvider. Alternatively, values may be hardcoded directly inside CustomEhcachingProvider.","For more information on cache templates and the Ehcache xml configuration file, refer to the documentation here.","For more information on linking caches to functions, refer to the documentation here."]},{"l":"User-authorization cache","p":["In addition to the above-mentioned Spring-managed caches, cBioPortal maintains a separate cache that holds references to sample lists, molecular profiles and cancer studies. This user-authorization cache is used to establish whether a user has access to the data of a particular sample list or molecular profile based on study-level permissions.","By default, the user-authorization cache is implemented as a HashMap that is populated when cBioPortal is started. This implementation allows for very fast response times of user-permission evaluation.","The user-authorization cache can be delegated to the Spring-managed caching solution by setting the cache.cache-map-utils.spring-managed to true. Depending on the implementation, this may add a delay to any data request that is caused by the additional consultation of the external cache. This configuration should only be used where a central caching solution is required or no instance/container-specific local caches are allowed. For example, cache eviction via the api/cache endpoint in a Kubernetes deployment of cBioPortal where multiple pods/containers that represent a single cBioPortal instance is possible with a Spring-managed user-authorization cache because a call to this endpoint in a single pod/container invalidates Redis caches for the entire deployment thereby preventing inconsistent state of user-authorization caches between pods."]},{"l":"Cache eviction","p":["When the database is updated (e.g new studies loaded, existing study updated, new gene panel imported) the caches of a cBioPortal instance should be updated. One way is to restart the cBioPortal spring application. When using Redis, this will work only when redis.clear_on_startup is not set to false.","Alternatively, caches can be cleared (a.k.a. evicted) by calling the /api/cache endpoint. Advantage of the cache eviction end point is that user-sessions remain undisturbed since the portal instance is not restarted. By default the cache eviction enpoint is disabled and can be enabled by setting cache.endpoint.enabled to true. The endpoint is secured by a secret API key that can be customized with the cache.endpoint.api-key property. Caches are evicted by making a DELETE request to the endoint while passing the API key in the X-API-KEY header. When using curl use the following command (replace the API key for the value configured in portal.properties):"]},{"l":"Cache eviction after cancer study updates","p":["When a study is added, deleted or updated, a more selective cache eviction strategy is possible, where only affected cached data is evicted. This more selective cache eviction is triggered by calling the /api/cache/{studyId} endpoint where is the cancer_study_identifier stated in the meta-study.txt file. When using curl use the following command after update of a study with study identifier my_cancer_study(replace the API key for the value configured in portal.properties):",":important: This endpoint can ony be used when adding/deleting/updating a study. When data related to gene panels or gene sets is updated, all caches should be evicted with a call to /api/cache."]},{"i":"how-does-study-specific-cache-eviction-work","l":"How does study-specific cache eviction work?"},{"l":"Structure of cache keys","p":["The caching solutions integrated with cBioPortal (EHCache and Redis) store data as key-value pairs. Each key represents a method call signature that contains the Java class name, the method name and a serialized representation of all method arguments. For instance this is the key for a call to the CancerTypMyBatisRepository.getAllCancerTypes() method with arguments [ SUMMARY, 10000000, 0, null, ASC]:","Cached data that relates to a specific study can be recognized by the occurrence of the cancer study identifier anywhere in the method arguments. The study identifier can occur in the method arguments because it is passed as argument itself, like here for a study with identifier study_es_0:","Alternatively the study identifier is present as the prefix of referenced study entities. For example, this is the request for all molecular profiles:"]},{"l":"Cache eviction rules","p":["When a study is added, deleted or updated, all caches are evicted where the respective key meets any of these requirements:","The key contains the cancer study identifier of the study that is added, deleted or updated.","The key does not contain the cancer study identifier of any study present in the database.","The rationale behind rule 1. is that when a key references data for the affected study it points to potentially outdated data and its associated cache should be evicted. The rationale behind rule 2. is that any key that does not reference data for any study potentially points to data derived from all studies in the database, including the affected study, and its associated cache should be evicted. Because not every key without study identifiers necessarily points to study related data, this rule is overly broad. At the moment of this writing we were unable to implement reliable methods that would further specify such keys. This might be a start-off point for future optimizations."]}],[{"l":"Study View Customization"},{"l":"How does the study view organize the charts","p":["Study view page is fully responsive, it will try to fit as many charts as possible based on the browser's width and height.","The layout of a chart is determined mainly based on priority. Higher priority will promote chart closer to the left-top.","In order to improve the layout, we added a layout algorithm layer. The study view page is using grid layout. All charts will be put into 2-dimensional systems. For example, pie chart, by default, takes 1 block and bar chart uses two blocks. All charts will be placed from left to right, top to bottom. In order to prevent misalignment, we promote small charts to fit into the space.","For logged-in(authenticated) users, charts layout is saved to users profile i.e, whenever user tries to re-visits the same url, previously saved layout will be loaded."]},{"l":"Study View Customization with Priority Data","p":["Example of study view in public portal: https://www.cbioportal.org/study?id=acc_tcga,lgg_tcga#summary"]},{"l":"Priorities","p":["20","200","2000","30","300","3000","40","400","70","8","80","9","90","Additional Info","AGE","Cancer Studies","CANCER_TYPE","CANCER_TYPE_DETAILED","Chart name(clinical attribute ID)","CNA Bar Chart","CNA Genes Table","Currently, we preassigned priority to few charts, but as long as you assign a priority in the database except than 1, these preassigned priorities will be overwritten.","Disease Free Survival Plot","Frontend default priority","GENDER, SEX","Mutated Genes Table","Mutation Count Bar Chart","Mutation Count vs. Fraction of Genome Altered Density Plot","Number of Samples Per Patient","Overall Survival Plot","The default score is 1.","The priority system is represented with a final score. The higher the final (numeric) score, the higher priority assigned.","This is combination of DFS_MONTH and DFS_STATUS","This is combination of OS_MONTH and OS_STATUS","To disable the chart, set the priority to -1.(Currently disables charts for single clinical attributes only)","To promote certain chart in study view, please increase priority in the database to a certain number. The higher the score, the higher priority it will be displayed in the study view. If you want to hide chart, please set the priority to 0. For combination chart, as long as one of the clinical attributes has been set to 0, it will be hidden."]}],[{"l":"Study View Customization"},{"l":"How does the study view organize the charts","p":["Study view page is fully responsive, it will try to fit as many charts as possible based on the browser's width and height.","The layout of a chart is determined mainly based on priority. Higher priority will promote chart closer to the left-top.","In order to improve the layout, we added a layout algorithm layer. The study view page is using grid layout. All charts will be put into 2-dimensional systems. For example, pie chart, by default, takes 1 block and bar chart uses two blocks. All charts will be placed from left to right, top to bottom. In order to prevent misalignment, we promote small charts to fit into the space.","For logged-in(authenticated) users, charts layout is saved to users profile i.e, whenever user tries to re-visits the same url, previously saved layout will be loaded."]},{"l":"Study View Customization with Priority Data","p":["Example of study view in public portal: https://www.cbioportal.org/study?id=acc_tcga,lgg_tcga#summary"]},{"l":"Priorities","p":["20","200","2000","30","300","3000","40","400","70","8","80","9","90","Additional Info","AGE","Cancer Studies","CANCER_TYPE","CANCER_TYPE_DETAILED","Chart name(clinical attribute ID)","CNA Bar Chart","CNA Genes Table","Currently, we preassigned priority to few charts, but as long as you assign a priority in the database except than 1, these preassigned priorities will be overwritten.","Disease Free Survival Plot","Frontend default priority","GENDER, SEX","Mutated Genes Table","Mutation Count Bar Chart","Mutation Count vs. Fraction of Genome Altered Density Plot","Number of Samples Per Patient","Overall Survival Plot","The default score is 1.","The priority system is represented with a final score. The higher the final (numeric) score, the higher priority assigned.","This is combination of DFS_MONTH and DFS_STATUS","This is combination of OS_MONTH and OS_STATUS","To disable the chart, set the priority to -1.(Currently disables charts for single clinical attributes only)","To promote certain chart in study view, please increase priority in the database to a certain number. The higher the score, the higher priority it will be displayed in the study view. If you want to hide chart, please set the priority to 0. For combination chart, as long as one of the clinical attributes has been set to 0, it will be hidden."]}],[{"l":"Introduction","p":["Using OncoKB does not require a data access token. But the instance you are connecting to only includes biological information by default. If you want to include tumor type summary, therapeutic levels and more, please consider obtaining a license from OncoKB."]},{"l":"How to obtain an OncoKB license","p":["Please review OncoKB terms","Please request for data access","You can find your token in your Account Settings after login."]},{"l":"Set up cBioPortal to include full OncoKB content","p":["Following properties can be edited in the portal.properties file or set in system variables if you are using docker.","show.oncokb should be set to true","oncokb.token should be set to a valid OncoKB access token value","oncokb.public_api.url should be set to https://www.oncokb.org/api/v1","Thank you for supporting future OncoKB development."]},{"l":"Include MSI-H and TMB-H annotation","p":["If you want to include the MSI-H and TMB-H annotation on patient view, please follow the instruction to import required clinical data. For MSI-H, a clinical attribute MSI_TYPE with value Instable is required. For TMB-H, a clinical attribute TMB_SCORE with value >=10 is required."]},{"l":"Disable OncoKB Service","p":["Please set show.oncokb to false in portal.properties or in system variables if you are using docker."]}],[{"l":"Data Loading"},{"l":"Introduction","p":["This page is the starting point for data loading. The General Overview section below contains all the required steps to get you started."]},{"l":"General Overview","p":["Getting your study data into cBioPortal requires four steps:","Setting up the validator","Preparing your study data","Validating your study data","Loading your study data"]},{"l":"Setting up the validator"},{"l":"Installation","p":["If you have a git clone of cBioPortal, the relevant scripts can be found in the folder: your_cbioportal_dir/core/src/main/scripts/importer"]},{"l":"Dependencies","p":["The scripts run in Python 3.4 or newer, and they require the modules requests and pyyaml. You can use this command to install those modules:","If you want the scripts to be able to generate html reports (recommended way for reading the validation errors, if any), then you will also need to install Jinja2. You can use this command:"]},{"l":"Preparing Study Data","p":["A study to be loaded in cBioPortal can basically consist of a directory where all the data files are located. Each data file needs a meta file that refers to it and both files need to comply to the format required for the specific data type. The format and fields expected for each file are documented in the File Formats page. Below is an example of the files in such a directory."]},{"l":"Rules","p":["There are just a few rules to follow:","meta_study, meta_clinical and respective clinical data file are the only mandatory files.","cancer type files can be mandatory if the study is referring to a cancer type that does not yet exist in the DB.","meta files can be named anything, as long as it starts or ends with name 'meta'. E.g. meta_test, meta.test, test.meta are all fine; metal_test and metastudy are wrong.","data files can be named anything and are referenced by a property data_filename set in the meta file."]},{"l":"Validating your study data","p":["Once all files are in place and follow the proper format, you can validate your files using the dataset validator script.","The validation can be run standalone, but it is also integrated into the metaImport script, which validates the data and then loads it if validation succeeds."]},{"l":"Loading Data","p":["To load the data into cBioPortal, the metaImport script has to be used. This script first validates the data and, if validation succeeds, loads the data."]},{"l":"Removing a Study","p":["To remove a study, the cbioportalImporter script can be used."]},{"l":"Example studies","p":["Examples for the different types of data are available on the File Formats page. The Provisional TCGA studies, downloadable from the Data Sets section are complete studies that can be used as reference when creating data files."]}],[{"l":"Downloads","p":["This page describes the various files available for download. The first section is targeted towards users of cBioPortal. The second section towards maintainers of cBioPortal instances."]},{"l":"User Downloads","p":["There are several ways in which one can download data from cBioPortal including manual and programmatic approaches. See options outlined below."]},{"l":"Datasets Page","p":["A zip file for each study on cbioportal.org can be download from the Datasets Page. One can also use the R client cBioPortalData to programmatically download all of these files."]},{"l":"Datahub","p":["The files for each study are also available from our datahub repository. This is basically the extracted version of the zip files in the Datasets Page. Note that this is a git LFS repo so if you are familiar with git you might prefer using this option."]},{"l":"API and API Clients","p":["Besides downloading all the study data one can also request slices of the data using the API. A slice of the data could e.g. be \"give me all the mutation data for one patient\" or \"get me all EGFR mutations for a particular group of samples\". There are API clients available in a variety of languages including bash, R and Python. See for more information the API documentation."]},{"l":"Instance Maintainer Downloads","p":["As an instance maintainer of cBioPortal there are a variety of files that might be helpful. See below."]},{"l":"Study staging files","p":["Staging files for the studies on cbioportal.org can be download from the Datasets Page. These studies can be validated and loaded in a local cBioPortal instances using the validator and importer. Any issues with a downloaded study can be reported on cBioPortal DataHub.","Example studies","TCGA Provisional studies often contain many different data types. These are excellent examples to use as reference when creating your own staging files. A detailed description on supported data types can be found in the File Formats documentation."]},{"l":"Complete cBioPortal database","p":["A MySQL database dump of the complete cbioportal.org database can be found here: http://download.cbioportal.org/mysql-snapshots/public-portal-dump.latest.sql.gz"]},{"l":"Seed Database","p":["The seed database is a MySQL dump for seeding a new instance of the cBioPortal. Instructions for loading the seed database can be found here. The seed database for human can be downloaded from cBioPortal Datahub. A mouse version can be found here."]}],[{"l":"Using the dataset validator","p":["To facilitate the loading of new studies into its database, cBioPortal provides a set of staging files formats for the various data types. To validate your files you can use the dataset validator script."]},{"l":"Running the validator","p":["To run the validator first go to the importer folder cbioportal_source_folder/core/src/main/scripts/importer and then run the following command:","This will tell you the parameters you can use:","For more information on the --portal_info_dir option, see Offline validation below. If your cBioPortal is not using hg19, you have to specify the reference_genome field in your meta_study.txt.","For more information, see Validation of non-human data.","When running the validator with parameter -r the validator will run the validation of the clinical data it will ignore all failing checks about values in the headers of the clinical data file.","When running the validator with parameter -m the validator will run the validation of the specific MAF file checks for the mutation file in strict maf check mode. This means that when the validator encounters these validation checks it will report them as an error instead of a warning."]},{"i":"example-1-test-study_es_0","l":"Example 1: test study_es_0","p":["As an example, you can try the validator with one of the test studies found in cbioportal_source_folder/core/src/test/scripts/test_data. Example, assuming port 8080 and using -v option to also see the progress:","Results in:","When using the -html option, a report will be generated, which looks like this for the previous example: Screenshot of a successful validation report"]},{"i":"example-2-test-study_es_1","l":"Example 2: test study_es_1","p":["More test studies for trying the validator ( study_es_1 and study_es_3) are available in cbioportal_source_folder/core/src/test/scripts/test_data. Example, assuming port 8080 and using -v option:","Results in:","And respective HTML report: Screenshot of an unsuccessful validation report"]},{"l":"Offline validation","p":["The validation script can be used offline, without connecting to a cBioPortal server. The tests that depend on information specific to the portal (which clinical attributes and cancer types have been previously defined, and which Entrez gene identifiers and corresponding symbols are supported), will instead be read from a folder with .json files generated from the portal."]},{"i":"example-3-validation-with-a-portal-info-folder","l":"Example 3: validation with a portal info folder","p":["To run the validator with a folder of portal information files, add the -p/--portal_info_dir option to the command line, followed by the path to the folder:"]},{"i":"example-4-generating-the-portal-info-folder","l":"Example 4: generating the portal info folder","p":["The portal information files can be generated on the server, using the dumpPortalInfo script. Go to cbioportal_source_folder/core/src/main/scripts, make sure the environment variables $JAVA_HOME and $PORTAL_HOME are set, and run dumpPortalInfo.pl with the name of the directory you want to create:"]},{"i":"example-5-validating-without-portal-specific-information","l":"Example 5: validating without portal-specific information","p":["Alternatively, you can run the validation script with the -n/--no_portal_checks flag to entirely skip checks relating to installation-specific metadata. Be warned that files succeeding this validation may still fail to load (correctly)."]},{"l":"Validation of non-human data","p":["When importing a study with a reference genome other than hg19/GRCh37, this should be specified in the meta_study.txt file, next to the reference_genome field. Supported values are hg19, hg38 and mm10.","cBioPortal is gradually introducing support for mouse. If you want to load mouse studies and you have to set up your database for mouse.","As an example, the command for the mouse example using the three parameters is given:"]},{"l":"Running the validator for multiple studies","p":["The importer folder cbioportal_source_folder/core/src/main/scripts/importer also contains a script for running the validator for multiple studies:","The following parameters can be used:","Parameters --url_server, --portal_info_dir, --no_portal_checks and --portal_properties are equal to the parameters with the same name in validateData.py. The script will save a log file with validation output ( log-validate-studies.txt) and output the validation status from the input studies:"]},{"i":"example-1-root-directory-parameter","l":"Example 1: Root directory parameter","p":["Validation can be run for all studies in a certain directory by using the --root-directory parameter. The script will append each folder in the root directory to the study list to validate:"]},{"i":"example-2-list-of-studies-parameter","l":"Example 2: List of studies parameter","p":["Validation can also be run for specific studies by using the --list-of-studies parameter. The paths to the different studies can be defined and seperated by a comma:"]},{"i":"example-3-combination-root-directory-and-list-of-studies-parameter","l":"Example 3: Combination root directory and list of studies parameter","p":["Validation can also be run on specific studies in a certain directory by combining the --root-directory and --list-of-studies parameter:"]},{"i":"example-4-html-folder-parameter","l":"Example 4: HTML folder parameter","p":["When HTML validation reports are desired, an output folder for these HTML files can be specified. This folder does not have to exist, the script can create the folder. The HTML validation reports will get the following name: study_name-validation.html. To create HTML validation reports for each study the --html-folder parameter needs to be defined:"]}],[{"l":"Using the metaImport script"},{"l":"Importing Data into cBioPortal","p":["The metaImport script should be used to automate the process of validating and loading datasets. It also has some nice features like an extra option to only load datasets that completely pass validation (i.e. with no errors, while warnings can be explicitly allowed by the user)."]},{"l":"Running the metaImport Script","p":["To run the metaImport script first go to the importer folder your_cbioportal_dir/core/src/main/scripts/importer and then run the following command:","This will tell you the parameters you can use:"]},{"l":"Example of Importing a study","p":["Export PORTAL_HOME as explained here, e.g.","and then run (this simple command only works if your cBioPortal is running at http://localhost/cbioportal - if this is not the case, follow the advanced example):"]},{"l":"Advanced Example","p":["This example imports the study to the localhost, creates an html report and shows status messages.","By adding -o, warnings will be overridden and import will start after validation."]},{"i":"development--debugging-mode","l":"Development / debugging mode","p":["For developers and specific testing purposes, an extra script, cbioportalImporter.py, is available which imports data regardless of validation results. Check this page for more information on how to use it."]}],[{"i":"#","p":["Arm Level CNA Data","Cancer Study","Cancer Type","Case Lists","Clinical Data","Continuous Copy Number Data","Custom namespace columns","Discrete Copy Number Data","Expression Data","Formats","Fusion Data(DEPRECATED)","Gene Panel Data","Gene Set Data","Generic Assay","GISTIC 2.0 Data","Introduction","Methylation Data","Mutation Data","Mutational Signature Data","Mutsig Data","Protein level Data","Resource Data","Segmented Data","Structural Variant Data","Study Tags file","Timeline Data"]},{"l":"Introduction","p":["This page describes the file formats that cancer study data should assume in order to be successfully imported into the database. Unless otherwise noted, all data files are in tabular-TSV (tab separated value) format and have an associated metadata file which is in a multiline record format. The metadata and data files should follow a few rules documented at the Data Loading page."]},{"l":"Formats"},{"l":"Cancer Study","p":["As described in the Data Loading tool page, the following file is needed to describe the cancer study:"]},{"l":"Meta file","p":["This file contains metadata about the cancer study. The file contains the following fields:","type_of_cancer: The cancer type abbreviation, e.g., \"brca\". This should be the same cancer type as specified in the meta_cancer_type.txt file, if available. The type can be \"mixed\" for studies with multiple cancer types.","cancer_study_identifier: A string used to uniquely identify this cancer study within the database, e.g., \"brca_joneslab_2013\".","name: The name of the cancer study, e.g., \"Breast Cancer (Jones Lab 2013)\".","description: A description of the cancer study, e.g., \"Comprehensive profiling of 103 breast cancer samples. Generated by the Jones Lab 2013\". This description may contain one or more URLs to relevant information.","citation (Optional): A relevant citation, e.g., \"TCGA, Nature 2012\".","pmid (Optional): One or more relevant pubmed ids (comma separated without whitespace). If used, the field citation has to be filled, too.","groups (Optional): When using an authenticating cBioPortal, lists the user-groups that are allowed access to this study. Multiple groups are separated with a semicolon \";\". The study will be invisible to users not in at least one of the listed groups, as if it wasn't loaded at all. e.g., \"PUBLIC;GDAC;SU2C-PI3K\". see User-Authorization for more information on groups","add_global_case_list (Optional): set to 'true' if you would like the \"All samples\" case list to be generated automatically for you. See also Case lists.","tags_file (Optional): the file name containing custom study tags for the study tags.","reference_genome (Optional): the study reference genome (e.g. hg19, hg38). Without specifying this property, the study will be assigned to the reference genome specified in portal.properties(property ucsc.build)."]},{"l":"Example","p":["An example meta_study.txt file would be:"]},{"l":"Cancer Type","p":["If the type_of_cancer specified in the meta_study.txt does not yet exist in the type_of_cancer database table, a meta_cancer_type.txt file is also mandatory."]},{"i":"meta-file-1","l":"Meta file","p":["The file is comprised of the following fields:","genetic_alteration_type: CANCER_TYPE","datatype: CANCER_TYPE","data_filename: your datafile"]},{"i":"example-1","l":"Example","p":["An example meta_cancer_type.txt file would be:"]},{"l":"Data file","p":["The file is comprised of the following columns in the order specified:","type_of_cancer: The cancer type abbreviation, e.g., \"brca\".","name: The name of the cancer type, e.g., \"Breast Invasive Carcinoma\".","dedicated_color: CSS color name of the color associated with this cancer study, e.g., \"HotPink\". See this list for supported names, and follow the awareness ribbons color schema. This color is associated with the cancer study on various web pages within the cBioPortal.","parent_type_of_cancer: The type_of_cancer field of the cancer type of which this is a subtype, e.g., \"Breast\". ℹ️ : you can set parent to tissue, which is the reserved word to place the given cancer type at \"root\" level in the \"studies oncotree\" that will be generated in the homepage (aka query page) of the portal."]},{"i":"example-2","l":"Example","p":["An example record would be:"]},{"l":"Clinical Data","p":["The clinical data is used to capture both clinical attributes and the mapping between patient and sample ids. The software supports multiple samples per patient.","As of March 2016, the clinical file is split into a patient clinical file and a sample clinical file. The sample file is required, whereas the patient file is optional. cBioPortal has specific functionality for a core set of patient and sample columns, but can also display custom columns (see section \"Custom columns in clinical data\")."]},{"l":"Meta files","p":["The two clinical metadata files (or just one metadata file if you choose to leave the patient file out) have to contain the following fields:","cancer_study_identifier: same value specified in meta_study.txt","genetic_alteration_type: CLINICAL","datatype: PATIENT_ATTRIBUTES or SAMPLE_ATTRIBUTES","data_filename: your datafile"]},{"l":"Examples","p":["An example metadata file, e.g. named meta_clinical_sample.txt, would be:","An example metadata file, e.g. named meta_clinical_patient.txt, would be:"]},{"l":"Data files","p":["For both patients and samples, the clinical data file is a two dimensional matrix with multiple clinical attributes. When the attributes are defined in the patient file they are considered to be patient attributes; when they are defined in the sample file they are considered to be sample attributes.","The first four rows of the clinical data file contain tab-delimited metadata about the clinical attributes. These rows have to start with a '#' symbol. Each of these four rows contain different type of information regarding each of the attributes that are defined in the fifth row:","Row 1: The attribute Display Names: The display name for each clinical attribute","Row 2: The attribute Descriptions: Long(er) description of each clinical attribute","Row 3: The attribute Datatype: The datatype of each clinical attribute (must be one of: STRING, NUMBER, BOOLEAN)","Row 4: The attribute Priority: A number which indicates the importance of each attribute. In the future, higher priority attributes will appear in more prominent places than lower priority ones on relevant pages (such as the Study View). A higher number indicates a higher priority.","Please note: Priority is not the sole factor determining which chart will be displayed first. A layout algorithm in study view also makes a minor adjustment on the layout. The algorithm tries to fit all charts into a 2 by 2 matrix (Mutated Genes Table occupies 2 by 2 space). When a chart can not be fitted in the first matrix, the second matrixed will be generated. And the second matrix will have lower priority than the first one. If later chart can fit into the first matrix, then its priority will be promoted.","Please see here for more detailed information about how study view utilize priority and how the layout is calculated based on priority.","Row 5: The attribute name for the database: This name should be in upper case.","Row 6: This is the first row that contains actual data."]},{"l":"Example clinical header","p":["Below is an example of the first 4 rows with the respective metadata for the attributes defined in the 5th row."]},{"l":"Clinical patient columns","p":["AGE: Age at which the condition or disease was first diagnosed, in years (number)","Custom attributes:","Custom Clinical Attribute Headers: Any other custom attribute can be added as well. See section \"Custom columns in clinical data\".","DFS_MONTHS: Disease free (months) since initial treatment","DFS_STATUS: Disease free status since initial treatment","GENDER or SEX: Gender or sex of the patient (string)","In the patient view, 0:DiseaseFree creates a green label, 1:Recurred/Progressed a red label.","In the patient view, 0:LIVING creates a green label, 1:DECEASED a red label.","Note on survival plots: to generate the survival plots successfully, the columns are required to be in pairs, which means the file should have a pair of columns that have the same prefix but ending with _STATUS and _MONTHS individually. For example, PFS_STATUS and PFS_MONTHS are a valid pair of columns that can generate the survival plots.","Note on survival status value: the value of survival status must prefixed with 0: or 1:. Value with prefix 0: means that no event (e.g. LIVING, DiseaseFree). Value with prefix 1: means that an event occurred (e.g. DECEASED, Recurred/Progressed).","OS_MONTHS: Overall survival in months since initial diagnosis","OS_STATUS: Overall patient survival status","PATIENT_DISPLAY_NAME: Patient display name (string)","PATIENT_ID (required): a unique patient ID. This field allows only numbers, letters, points, underscores and hyphens.","Possible values: 0:DiseaseFree, 1:Recurred/Progressed","Possible values: 1:DECEASED, 0:LIVING","The file containing the patient attributes has one required column:","The following columns are used by the study view as well as the patient view. In the study view they are used to create the survival plots. In the patient view they are used to add information to the header.","These columns, when provided, add additional information to the patient description in the header:","TUMOR_SITE"]},{"l":"Example patient data file"},{"l":"Clinical sample columns","p":["By adding PATIENT_ID here, cBioPortal will map the given sample to this patient. This enables one to associate multiple samples to one patient. For example, a single patient may have had multiple biopsies, each of which has been genomically profiled. See this example for a patient with multiple samples.","CANCER_TYPE_DETAILED: Cancer Type Detailed, a sub-type of the specified CANCER_TYPE","CANCER_TYPE: Cancer Type","Custom attributes:","Custom Clinical Attribute Headers: Any other custom attribute can be added as well. See section \"Custom columns in clinical data\".","If set to metastatic or metastasis: red","If set to primary or otherwise: black","If set to recurrence, recurred, progression or progressed: orange","METASTATIC_SITE or PRIMARY_SITE: Override TUMOR_SITE (patient level attribute) depending on sample type","OTHER_SAMPLE_ID: OTHER_SAMPLE_ID is no longer supported. Please replace this column header with SAMPLE_ID.","PATIENT_ID (required): A patient ID. This field can only contain numbers, letters, points, underscores and hyphens.","SAMPLE_CLASS","SAMPLE_DISPLAY_NAME: displayed in addition to the ID","SAMPLE_ID (required): A sample ID. This field can only contain numbers, letters, points, underscores and hyphens.","SAMPLE_TYPE, TUMOR_TISSUE_SITE or TUMOR_TYPE: gives sample icon in the timeline a color.","The file containing the sample attributes has two required columns:","The following columns additionally affect the Timeline data visualization:","The following columns affect the header of the patient view by adding text to the samples in the header:","The following columns are required for the pan-cancer summary statistics tab ( example)."]},{"l":"Example sample data file"},{"l":"Columns with specific functionality","p":["These columns can be in either the patient or sample file.","CANCER_TYPE: Overrides study wide cancer type","CANCER_TYPE_DETAILED","KNOWN_MOLECULAR_CLASSIFIER","GLEASON_SCORE: Radical prostatectomy Gleason score for prostate cancer","HISTOLOGY","TUMOR_STAGE_2009","TUMOR_GRADE","ETS_RAF_SPINK1_STATUS","TMPRSS2_ERG_FUSION_STATUS","ERG_FUSION_ACGH","SERUM_PSA","DRIVER_MUTATIONS"]},{"l":"Custom columns in clinical data","p":["cBioPortal supports custom columns with clinical data in either the patient or sample file. They should follow the previously described 5-row header format. Be sure to provide the correct Datatype, for optimal search, sorting, filtering (in clinical data tab) and visualization.","The Clinical Data Dictionary from MSKCC is used to normalize clinical data, and should be followed to make the clinical data comparable between studies. This dictionary provides a definition whether an attribute should be defined on the patient or sample level, as well as provides a name, description and datatype. The data curator can choose to ignore these proposed definitions, but not following this dictionary might make comparing data between studies more difficult. It should however not break any cBioPortal functionality. See GET /api/ at https://oncotree.mskcc.org/cdd/swagger-ui.html#/ for the data dictionary of all known clinical attributes."]},{"l":"Banned column names","p":["MUTATION_COUNT and FRACTION_GENOME_ALTERED are auto populated clinical attributes, and should therefore not be present in clinical data files."]},{"l":"Discrete Copy Number Data","p":["The discrete copy number data file contain values that would be derived from copy-number analysis algorithms like GISTIC 2.0 or RAE. GISTIC 2.0 can be installed or run online using the GISTIC 2.0 module on GenePattern. For some help on using GISTIC 2.0, check the Data Loading: Tips and Best Practices page. When loading case list data, the _cna case list is required. See the case list section."]},{"l":"Wide vs Long format","p":["For CNA data two formats are supported: the wide, and the long format:","Wide format: a matrix, where each row is a gene, and each column is a sample","Long format: not a matrix, each row is a gene-sample combination; this makes the file longer"]},{"l":"Wide format"},{"i":"meta-file-2","l":"Meta file","p":["The meta file is comprised of the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: COPY_NUMBER_ALTERATION","datatype: DISCRETE","stable_id: gistic, cna, cna_rae or cna_consensus","show_profile_in_analysis_tab: true","profile_name: A name for the discrete copy number data, e.g., \"Putative copy-number alterations from GISTIC\"","profile_description: A description of the copy number data, e.g., \"Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification.\"","data_filename: your datafile","gene_panel (Optional): gene panel stable id","pd_annotations_filename (Optional): name of custom driver annotations file"]},{"i":"example-3","l":"Example","p":["An example metadata file could be named meta_cna.txt and its contents could be:"]},{"i":"data-file-1","l":"Data file","p":["For each gene (row) in the data file, the following columns are required in the order specified:","One or both of:","Hugo_Symbol: A HUGO gene symbol.","Entrez_Gene_Id: A Entrez Gene identifier.","And:","An additional column for each sample in the dataset using the sample id as the column header.","For each gene-sample combination, a copy number level is specified:","\"-2\" is a deep loss, possibly a homozygous deletion","\"-1\" is a single-copy loss (heterozygous deletion)","\"0\" is diploid","\"1\" indicates a low-level gain","\"2\" is a high-level amplification."]},{"i":"example-4","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"l":"Long format"},{"i":"meta-file-3","l":"Meta file","p":["The meta file of wide format is comprised of the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: COPY_NUMBER_ALTERATION","datatype: DISCRETE_LONG Note: It will end up as datatype DISCRETE in the database, because the LONG data format is only relevant while importing.","stable_id: gistic, cna, cna_rae or cna_consensus","show_profile_in_analysis_tab: true","profile_name: A name for the discrete copy number data, e.g., \"Putative copy-number alterations from GISTIC\"","profile_description: A description of the copy number data, e.g., \"Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification.\"","data_filename: your datafile","gene_panel (Optional): gene panel stable id","namespaces (Optional): Comma-delimited list of namespaces to import."]},{"i":"example-5","l":"Example","p":["An example metadata file could be named meta_cna.txt and its contents could be:"]},{"i":"data-file-2","l":"Data file","p":["Each row contains a row-sample combination. Custom driver annotations are added as columns to the data file, just like custom namespace columns."]},{"i":"example-6","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"l":"Adding your own discrete copy number columns","p":["Additional columns can be added to the discrete copy number long data file. In this way, the portal will parse and store your own CNA fields in the database.","See Custom namespace columns for more information on adding custom columns to data files."]},{"l":"Custom driver annotations file","p":["Custom driver annotations can be defined for discrete copy number data. These annotations can be used to complement or replace default driver annotation resources OncoKB and HotSpots. Custom driver annotations can be placed in a separate file that is referenced by the pd_annotations_file field of the meta file. The annotation file can hold the following columns:","Hugo_Symbol (Optional): A HUGO gene symbol. Required when column Entrez_Gene_Id is not present.","Entrez_Gene_Id (Optional): A Entrez Gene identifier. Required when column Hugo_Symbol is not present.","SAMPLE_ID: A sample ID. This field can only contain numbers, letters, points, underscores and hyphens.","cbp_driver (Optional): \"Putative_Driver\", \"Putative_Passenger\", \"Unknown\", \"NA\" or \"\" (empty value). This field must be present if the cbp_driver_annotation is also present in the MAF file.","cbp_driver_annotation (Optional): Description field for the cbp_driver value (limited to 80 characters). This field must be present if the cbp_driver is also present in the MAF file. This field is free text. Example values for this field are: \"Pathogenic\" or \"VUS\".","cbp_driver_tiers (Optional): Free label/category that marks the mutation as a putative driver such as \"Driver\", \"Highly actionable\", \"Potential drug target\". . This field must be present if the cbp_driver_tiers_annotation is also present in the MAF file. In the OncoPrint view's Mutation Color dropdown menu, these tiers are ordered alphabetically. This field is free text and limited to 20 characters. For mutations without a custom annotation, leave the field blank or type \"NA\".","cbp_driver_tiers_annotation (Optional): Description field for the cbp_driver_tiers value (limited to 80 characters). This field must be present if the cbp_driver_tiers is also present in the MAF file. This field can not be present when the cbp_driver_tiers field is not present.","All genes referenced in the custom driver annotation file must be present in the data file for discrete copy number alterations.","The cbp_driver column flags the mutation as either driver or passenger. In cBioPortal, passenger mutations are also known as variants of unknown significance (VUS). The cbp_driver_tiers column assigns an annotation tier to the mutation, such as \"Driver\", \"Highly actionable\" or \"Potential drug target\". When a tier is selected, mutations with that annotation are highlighted as driver. Both types of custom annotations contain a second column with the suffix _annotation, to add a description. This is displayed in the tooltip that appears when hovering over the sample's custom annotation icon in the OncoPrint view.","You can learn more about configuring these annotations in the portal.properties documentation. When properly configured, the customized annotations appear in the \"Mutation Color\" menu of the OncoPrint view: schreenshot mutation color menu"]},{"i":"example-7","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"i":"gistic-20-format","l":"GISTIC 2.0 Format","p":["GISTIC 2.0 outputs a tabular file similarly formatted to the cBioPortal format, called prefix_all_thresholded.by_genes.txt. In this file the gene symbol is found in the Gene Symbol column, while Entrez gene IDs are in the Gene ID or Locus ID column. Please rename Gene Symbol to Hugo_Symbol and Gene ID or Locus ID to Entrez_Gene_Id. The Cytoband column can be kept in the table, but note that these values are ignored in cBioPortal. cBioPortal uses cytoband annotations from the map_location column in NCBI's Homo_sapiens.gene_info.gz when loading genes into the seed database."]},{"l":"Continuous Copy Number Data"},{"i":"meta-file-4","l":"Meta file","p":["The continuous copy number metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: COPY_NUMBER_ALTERATION.","datatype: CONTINUOUS","stable_id: linear_CNA","show_profile_in_analysis_tab: false.","profile_name: A name for the copy number data, e.g., \"copy-number values\".","profile_description: A description of the copy number data, e.g., \"copy-number values for each gene (from Affymetrix SNP6).\".","data_filename: your datafile","gene_panel (Optional): gene panel stable id","cBioPortal also supports log2 copy number data. If your data is in log2, change the following fields:","datatype: LOG2-VALUE","stable_id: log2CNA"]},{"i":"example-8","l":"Example","p":["An example metadata file, e.g. meta_log2_cna.txt, would be:"]},{"i":"data-file-3","l":"Data file","p":["The log2 copy number data file follows the same format as expression data files. See Expression Data for a description of the expression data file format."]},{"i":"gistic-20-format-1","l":"GISTIC 2.0 Format","p":["GISTIC 2.0 outputs a tabular file similarly formatted to the cBioPortal format, called prefix_all_data_by_genes.txt. In this file the gene symbol is found in the Gene Symbol column, while Entrez gene IDs are in the Gene ID or Locus ID column. Please rename Gene Symbol to Hugo_Symbol and Gene ID or Locus ID to Entrez_Gene_Id. The Cytoband column can be kept in the table, but note that these values are ignored in cBioPortal. cBioPortal uses cytoband annotations from the map_location column in NCBI's Homo_sapiens.gene_info.gz when loading genes into the seed database."]},{"l":"Segmented Data","p":["A SEG file (segmented data; .seg or .cbs) is a tab-delimited text file that lists loci and associated numeric values. The segmented data file format is the output of the Circular Binary Segmentation algorithm (Olshen et al., 2004). This Segment data enables the 'CNA' lane in the Genomic overview of the Patient view (as can be seen in this example)."]},{"i":"meta-file-5","l":"Meta file","p":["The segmented metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: COPY_NUMBER_ALTERATION","datatype: SEG","reference_genome_id: Reference genome version. Supported values: \"hg19\"","description: A description of the segmented data, e.g., \"Segment data for the XYZ cancer study.\".","data_filename: your datafile"]},{"i":"example-9","l":"Example:","p":["An example metadata file, e.g. meta_cna_hg19_seg.txt, would be:"]},{"i":"data-file-4","l":"Data file","p":["The first row contains column headings and each subsequent row contains a locus and an associated numeric value. See also the Broad IGV page on this format."]},{"i":"example-10","l":"Example:","p":["An example data file which includes the required column header would look like:"]},{"l":"Expression Data","p":["An expression data file is a two dimensional matrix with a gene per row and a sample per column. For each gene-sample pair, a real number represents the gene expression in that sample."]},{"i":"meta-file-6","l":"Meta file","p":["The expression metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: MRNA_EXPRESSION","datatype: CONTINUOUS, DISCRETE or Z-SCORE","stable_id: see table below.","source_stable_id: Required when both conditions are true: (1) datatype= Z-SCORE and (2) this study contains GSVA data. Should contain stable_id of the expression file for which this Z-SCORE file is the statistic.","show_profile_in_analysis_tab: false (you can set to true if Z-SCORE to enable it in the oncoprint, for example).","profile_name: A name for the expression data, e.g., \"mRNA expression (microarray)\".","profile_description: A description of the expression data, e.g., \"Expression levels (Agilent microarray).\".","data_filename: your datafile","gene_panel (Optional): gene panel stable id"]},{"l":"Supported stable_id values for MRNA_EXPRESSION","p":["For historical reasons, cBioPortal expects the stable_id to be one of those listed in the following static set. The stable_id for continuous RNA-seq data has two options: rna_seq_mrna or rna_seq_v2_mrna. These options were added to distinguish between two different TCGA pipelines, which perform different types of normalization (RPKM and RSEM). However, for custom datasets either one of these stable_id can be chosen."]},{"i":"example-11","l":"Example","p":["An example metadata, e.g. meta_expression.txt file would be:"]},{"i":"data-file-5","l":"Data file","p":["For each gene (row) in the data file, the following columns are required in the order specified:","One or both of:","Hugo_Symbol: A HUGO gene symbol.","Entrez_Gene_Id: A Entrez Gene identifier.","And:","An additional column for each sample in the dataset using the sample id as the column header.","For each gene-sample combination, a value is specified:","A real number for each sample id (column) in the dataset, representing the expression value for the gene in the respective sample.","or NA for when the expression value for the gene in the respective sample could not (or was not) be measured (or detected)."]},{"l":"z-score instructions","p":["For mRNA expression data, we typically expect the relative expression of an individual gene and tumor to the gene's expression distribution in a reference population. That reference population is either all tumors that are diploid for the gene in question, or, when available, normal adjacent tissue. The returned value indicates the number of standard deviations away from the mean of expression in the reference population (Z-score). This measure is useful to determine whether a gene is up- or down-regulated relative to the normal samples or all other tumor samples. Note, the importer tool can create normalized (z-score) expression data on your behalf. Please visit the Z-Score normalization script wiki page for more information. A corresponding z-score metadata file would be something like:"]},{"i":"examples-of-data-files","l":"Examples of data files:","p":["An example data file which includes the required column header and leaves out Hugo_Symbol(recommended) would look like:","An example data file which includes both Hugo_Symbo and Entrez_Gene_Id would look like (supported, but not recommended as it increases the chances of errors regarding ambiguous gene symbols):","An example data file with only Hugo_Symbol column (supported, but not recommended as it increases the chances of errors regarding ambiguous gene symbols):"]},{"l":"Mutation Data","p":["When loading mutation data, the _sequenced case list is required. See the case list section."]},{"i":"meta-file-7","l":"Meta file","p":["The mutation metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: MUTATION_EXTENDED","datatype: MAF","stable_id: mutations","show_profile_in_analysis_tab: true","profile_name: A name for the mutation data, e.g., \"Mutations\".","profile_description: A description of the mutation data, e.g., \"Mutation data from whole exome sequencing.\".","data_filename: your data file","gene_panel (optional): gene panel stable id. See Gene panels for mutation data.","swissprot_identifier (optional): accession or name, indicating the type of identifier in the SWISSPROT column","variant_classification_filter (optional): List of Variant_Classifications values to be filtered out.","namespaces (optional): Comma-delimited list of namespaces to import."]},{"l":"Gene panels for mutation data","p":["Using the gene_panel property it is possible to annotate all samples in the MAF file as being profiled on the same specified gene panel.","Please use the Gene Panel Matrix file when:","Data contains samples that are profiled but no mutations are called. Also please add these to the _sequenced case list.","Multiple gene panels are used to profile the samples in the MAF file."]},{"l":"Variant classification filter","p":["The variant_classification_filter field can be used to filter out specific mutations. This field should contain a comma separated list of Variant_Classification values. By default, cBioPortal filters out Silent, Intron, IGR, 3'UTR, 5'UTR, 3'Flank and 5'Flank, except for the promoter mutations of the TERT gene. For no filtering, include this field in the metadata file, but leave it empty. For cBioPortal default filtering, do not include this field in the metadata file. Allowed values to filter out (mainly from Mutation Annotation Format page): Frame_Shift_Del, Frame_Shift_Ins, In_Frame_Del, In_Frame_Ins, Missense_Mutation, Nonsense_Mutation, Silent, Splice_Site, Translation_Start_Site, Nonstop_Mutation, 3'UTR, 3'Flank, 5'UTR, 5'Flank, IGR, Intron, RNA, Targeted_Region, De_novo_Start_InFrame, De_novo_Start_OutOfFrame, Splice_Region and Unknown"]},{"l":"Tumor seq allele ambiguity","p":["Bugs may exist in MAF data that make it ambiguous as to whether Tumor_Seq_Allele1 or Tumor_Seq_Allele2 should be seen as the variant allele to be used when a new mutation record is created and imported in cBioPortal. In such cases, preference is given to the tumor seq allele value that matches a valid nucleotide pattern ^[ATGC]*$ versus a null or empty value, or \"-\". For example, given Reference_Allele= \"G\", Tumor_Seq_Allele= \"-\", and Tumor_Seq_Allele2= \"A\", preference will be given to Tumor_Seq_Allele2. Using this same example with Tumor_Seq_Allele1= \"T\", preference will be given to Tumor_Seq_Allele1 if it does not match Reference_Allele, which in this case it does not.","When curating MAF data, it is best practice to leave Tumor_Seq_Allele1 empty if this information is not provided in your data source to avoid this ambiguity."]},{"l":"Namespaces","p":["The namespaces field can be used to specify additional MAF columns for import. This field should contain a comma separated list of namespaces. Namespaces can be identified as prefixes to an arbitrary set of additional MAF columns (separated with a period e.g ASCN.total_copy_number, ASCN.minor_copy_number). All columns with a prefix matching a namespace specified in the metafile will be imported; columns with an unspecified namespace will be ignored. If no additional columns beyond the required set need to be imported, the field should be left blank."]},{"i":"example-12","l":"Example","p":["An example metadata file would be:"]},{"i":"data-file-6","l":"Data file","p":["The cBioPortal mutation data file extends the Mutation Annotation Format(MAF) created as part of The Cancer Genome Atlas (TCGA) project, by adding extra annotations to each mutation record. This section describes:","How to create the cBioPortal mutation data file with a minimal MAF file using the Genome Nexus Annotation Pipeline.","The description of the cBioPortal mutation data file. You can also get the cBioPortal mutation data file from vcf using: vcf2maf."]},{"l":"Create the cBioPortal mutation data file with Genome Nexus with a minimal MAF file"},{"l":"Minimal MAF file format","p":["A minimal mutation annotations file can contain just the five genomic change columns plus one sample identifier column. From this minimal MAF, it is possible to create the cBioPortal mutation data file by running it through the Genome Nexus Annotation Pipeline.","Chromosome (Required): A chromosome number, e.g., \"7\".","Start_Position (Required): Start position of event.","End_Position (Required): End position of event.","Reference_Allele (Required): The plus strand reference allele at this position.","Tumor_Seq_Allele2 (Required): Primary data genotype.","Tumor_Sample_Barcode (Required): This is the sample ID. Either a TCGA barcode (patient identifier will be extracted), or for non-TCGA data, a literal SAMPLE_ID as listed in the clinical data file.","In addition to the above columns, it is recommended to have the read counts to calculate variant allele frequencies:","t_alt_count (Optional, but recommended): Variant allele count (tumor).","t_ref_count (Optional, but recommended): Reference allele count (tumor).","The following extra annotation columns are important for making sure mutation specific UI functionality works well in the portal:","Protein_position (Optional): (annotation column) Required to initialize the 3D viewer in mutations view","SWISSPROT (Optional): (annotation column) UniProtKB/SWISS-PROT name (formerly called ID) or accession code depending on the value of the swissprot_identifier metadatum, e.g. O11H1_HUMAN or Q8NG94. Is not required, but not having it may result in inconsistent PDB structure matching in mutations view."]},{"l":"Creating the cBioPortal mutation data file","p":["Once you have a minimal MAF you can run it through the Genome Nexus Annotation Pipeline. This tool runs annotates variants against the Genome Nexus Server, which in turn leverages Ensembl Variant Effect Predictor (VEP) and selects a single effect per variant. Protein identifiers will be mapped to UniProt canonical isoforms (see also this mapping file)."]},{"l":"cBioPortal mutation data file format","p":["1 column with the amino acid change.","1 These columns are currently not shown in the Mutation tab and Patient view.","32 columns from the TCGA MAF format.","4 columns with information on reference and variant allele counts in tumor and normal samples.","BAM_File1 (Optional): Not used.","Center (Optional): The sequencing center.","Chromosome (Required): A chromosome number, e.g., \"7\".","dbSNP_RS1 (Optional): Latest dbSNP rs ID.","dbSNP_Val_Status1 (Optional): dbSNP validation status.","End_Position (Optional, but recommended for additional features such as Cancer Hotspots annotations): End position of event.","Entrez_Gene_Id (Optional, but recommended): A Entrez Gene identifier.","HGVSp_Short (Required): Amino Acid Change, e.g. p.V600E.","Hugo_Symbol (Required): A HUGO gene symbol.","Match_Norm_Seq_Allele1 (Optional): Primary data.","Match_Norm_Seq_Allele2 (Optional): Primary data.","Match_Norm_Validation_Allele11 (Optional): Secondary data from orthogonal technology.","Match_Norm_Validation_Allele21 (Optional): Secondary data from orthogonal technology.","Matched_Norm_Sample_Barcode1 (Optional): The sample ID for the matched normal sample.","Mutation_Status (Optional): \"Somatic\" or \"Germline\" are supported by the UI in Mutations tab. \"None\", \"LOH\" and \"Wildtype\" will not be loaded. Other values will be displayed as text.","n_alt_count (Optional): Variant allele count (normal).","n_ref_count (Optional): Reference allele count (normal).","NCBI_Build (Required)1: The Genome Reference Consortium Build is used by a variant calling software. It must be \"GRCh37\" or \"GRCh38\" for a human, and \"GRCm38\" for a mouse.","Reference_Allele (Required): The plus strand reference allele at this position.","Score1 (Optional): Not used.","Sequence_Source1 (Optional): Molecular assay type used to produce the analytes used for sequencing.","Sequencer1 (Optional): Instrument used to produce primary data.","Sequencing_Phase1 (Optional): Indicates current sequencing phase.","Start_Position (Optional, but recommended for additional features such as Cancer Hotspots annotations): Start position of event.","Strand (Optional): We assume that the mutation is reported for the + strand.","t_alt_count (Optional): Variant allele count (tumor).","t_ref_count (Optional): Reference allele count (tumor).","The cBioPortal mutation data file format recognized by the portal has:","Tumor_Sample_Barcode (Required): This is the sample ID. Either a TCGA barcode (patient identifier will be extracted), or for non-TCGA data, a literal SAMPLE_ID as listed in the clinical data file.","Tumor_Seq_Allele1 (Optional): Primary data genotype.","Tumor_Seq_Allele2 (Required): Primary data genotype.","Tumor_Validation_Allele1 (Optional): Secondary data from orthogonal technology.","Tumor_Validation_Allele2 (Optional): Secondary data from orthogonal technology.","Validation_Method1 (Optional): The assay platforms used for the validation call.","Validation_Status (Optional): Second pass results from orthogonal technology. \"Valid\", \"Invalid\", \"Untested\", \"Inconclusive\", \"Redacted\", \"Unknown\" or \"NA\".","Variant_Classification (Required): Translational effect of variant allele, e.g. Missense_Mutation, Silent, etc.","Variant_Type 1(Optional): Variant Type, e.g. SNP, DNP, etc.","Verification_Status1 (Optional): Second pass results from independent attempt using same methods as primary data source. \"Verified\", \"Unknown\" or \"NA\"."]},{"l":"Custom driver annotations","p":["It is possible to manually add columns for defining custom driver annotations. These annotations can be used to complement or replace default driver annotation resources OncoKB and HotSpots.","cbp_driver (Optional): \"Putative_Driver\", \"Putative_Passenger\", \"Unknown\", \"NA\" or \"\" (empty value). This field must be present if the cbp_driver_annotation is also present in the MAF file.","cbp_driver_annotation (Optional): Description field for the cbp_driver value (limited to 80 characters). This field must be present if the cbp_driver is also present in the MAF file. This field is free text. Example values for this field are: \"Pathogenic\" or \"VUS\".","cbp_driver_tiers (Optional): Free label/category that marks the mutation as a putative driver such as \"Driver\", \"Highly actionable\", \"Potential drug target\". . This field must be present if the cbp_driver_tiers_annotation is also present in the MAF file. In the OncoPrint view's Mutation Color dropdown menu, these tiers are ordered alphabetically. This field is free text and limited to 20 characters. For mutations without a custom annotation, leave the field blank or type \"NA\".","cbp_driver_tiers_annotation (Optional): Description field for the cbp_driver_tiers value (limited to 80 characters). This field must be present if the cbp_driver_tiers is also present in the MAF file. This field can not be present when the cbp_driver_tiers field is not present.","The cbp_driver column flags the mutation as either driver or passenger. In cBioPortal, passenger mutations are also known as variants of unknown significance (VUS). The cbp_driver_tiers column assigns an annotation tier to the mutation, such as \"Driver\", \"Highly actionable\" or \"Potential drug target\". When a tier is selected, mutations with that annotation are highlighted as driver. Both types of custom annotations contain a second column with the suffix _annotation, to add a description. This is displayed in the tooltip that appears when hovering over the sample's custom annotation icon in the OncoPrint view.","You can learn more about configuring these annotations in the portal.properties documentation. When properly configured, the customized annotations appear in the \"Mutation Color\" menu of the OncoPrint view: schreenshot mutation color menu"]},{"l":"Adding your own mutation annotation columns","p":["Additional mutation annotation columns can be added to the cBioPortal mutation data file. In this way, the portal will parse and store your own MAF fields in the database. For example, mutation data that you find on cBioPortal.org comes from MAF files that have been further enriched with information from mutationassessor.org, which leads to a \"Mutation Assessor\" column in the mutation table.","See Custom namespace columns for more information on adding custom columns to data files."]},{"i":"allele-specific-copy-number-ascn-annotations","l":"Allele specific copy number (ASCN) annotations","p":["Allele specific copy number (ASCN) annotation is also supported and may be added using namespaces, described here. If ASCN data is present in the cBioPortal mutation data file, the deployed cBioPortal instance will display additional columns in the mutation table showing ASCN data.","The ASCN columns below are optional by default. If ascn is a defined namespace in meta_mutations_extended.txt, then these columns are ALL required.","ASCN.ASCN_METHOD (Optional): Method used to obtain ASCN data e.g \"FACETS\".","ASCN.CCF_EXPECTED_COPIES (Optional): Cancer-cell fraction if mutation exists on major allele.","ASCN.CCF_EXPECTED_COPIES_UPPER (Optional): Upper error for CCF estimate.","ASCN.EXPECTED_ALT_COPIES (Optional): Estimated number of copies harboring mutant allele.","ASCN.CLONAL (Optional): \"Clonal\", \"Subclonal\", or \"Indeterminate\".","ASCN.TOTAL_COPY_NUMBER (Optional): Total copy number of the gene.","ASCN.MINOR_COPY_NUMBER (Optional): Copy number of the minor allele.","ASCN.ASCN_INTEGER_COPY_NUMER (Optional): Absolute integer copy-number estimate."]},{"l":"Example cBioPortal mutation data file","p":["An example cBioPortal mutation data file can be found in the cBioPortal test study study_es_0."]},{"l":"Filtered mutations","p":["A special case for Entrez_Gene_Id=0 and Hugo_Symbol=Unknown: when this combination is given, the record is parsed in the same way as Variant_Classification=IGR and therefore filtered out."]},{"l":"Methylation Data","p":["The Portal expects a single value for each gene in each sample, usually a beta-value from the Infinium methylation array platform."]},{"i":"meta-file-8","l":"Meta file","p":["The methylation metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: METHYLATION","datatype: CONTINUOUS","stable_id: \"methylation_hm27\" or \"methylation_hm450\" (depending on platform).","show_profile_in_analysis_tab: false","profile_name: A name for the methylation data, e.g., \"Methlytation (HM27)\".","profile_description: A description of the methlytation data, e.g., \"Methylation beta-values (HM27 platform). For genes with multiple methylation probes, the probe least correlated with expression is selected.\".","data_filename: your datafile","gene_panel (Optional): gene panel stable id"]},{"i":"example-13","l":"Example","p":["An example metadata file would be:"]},{"i":"data-file-7","l":"Data file","p":["The methylation data file follows the same format as expression data files. See Expression Data for a description of the expression data file format. The Portal expects a single value for each gene in each sample, usually a beta-value from the Infinium methylation array platform."]},{"l":"Protein level Data","p":["Protein expression measured by reverse-phase protein array or mass spectrometry. Antibody-sample pairs, with a real number representing the protein level for that sample."]},{"i":"meta-file-9","l":"Meta file","p":["The protein level metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: PROTEIN_LEVEL","datatype: LOG2-VALUE or Z-SCORE","stable_id: rppa, rppa_Zscores, protein_quantification or protein_quantification_zscores","show_profile_in_analysis_tab: false ( true for Z-SCORE datatype)","profile_name: A name for the RPPA data, e.g., \"RPPA data\".","profile_description: A description of the RPPA data, e.g., \"RPPA levels.\".","data_filename: your datafile","gene_panel (Optional): gene panel stable id","An example metadata file would be:","NB: You also need a Z-SCORE file if you want protein levels to be available in query UI and in Oncoprint visualization. E.g.:"]},{"i":"data-file-8","l":"Data file","p":["A protein level data file is a two dimensional matrix with a RPPA antibody per row and a sample per column. For each antibody-sample pair, a real number represents the protein level for that sample. The antibody information can contain one or more HUGO gene symbols and/or entrez gene identifiers, separated by a space, and an antibody ID pair separated by the \"|\" symbol."]},{"i":"example-14","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"l":"Structural Variant Data","p":["The cBioPortal can load all kinds of structural variant data but at the moment only a subset of them, fusions, are displayed."]},{"i":"meta-file-10","l":"Meta file","p":["The structural variant metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: STRUCTURAL_VARIANT","datatype: SV","stable_id: structural_variants","show_profile_in_analysis_tab: true.","profile_name: A name for the fusion data, e.g., \"Structural Variants\".","profile_description: A description of the structural variant data.","data_filename: your datafile (e.g. data_sv.txt)","gene_panel (Optional): gene panel stable id","An example metadata file would be:"]},{"i":"data-file-9","l":"Data file","p":["0","2","2078","21","3032067","39","39842043","3to5","3to5 or 5to3 or 5to5 or 3to3","4","41","42874110","45556","5_Prime_UTR,3_Prime_UTR,Promoter,Exon,Intron","55","63","7113","93891","A structural variant data file is a tab-delimited file with one structural variant per row. For each structural variant (row) in the data file, the following tab-delimited values are required:","Allowed Values","Annotation","Antisense fusion, Deletion within transcript: mid-exon, Duplication of 1 exon: in frame","Any comments/free text.","As defined in the clinical sample file.","Breakpoint_Type","Chromosome of Gene 1.(strongly recommended field)","Chromosome of Gene 2.","Class","Comments","Connection_Type","Deletion","Deletion,Duplication,Insertion,Inversion,Translocation.","Description of the event. For a fusion event, fill in Fusion. It is a free text.","Description of this event at site 1. This could be the location of 1st breakpoint in case of a fusion event","Description of this event at site 2. This could be the location of the 2nd breakpoint in case of a fusion event.","DNA_Support","Ensembl transcript ID of gene 1.","Ensembl transcript ID of gene 2.","ENST00000288319","ENST00000398585","Entrez Gene identifier of gene 1. (strongly recommended field)","Entrez Gene identifier of gene 2.","ERG","Event_Info","Example Values","Exon","Field","For an example see datahub. For an example see datahub. At a minimum Sample_Id, either Site1_Hugo_Symbol/ Site1_Entrez_Gene_Id or Site2_Hugo_Symbol/ Site2_Entrez_Gene_Id and SV_Status are required. For the stuctural variant tab visualization (still in development) one needs to provide those field as well as Site1_Ensembl_Transcript_Id, Site2_Ensembl_Transcript_Id, Site1_Region and Site2_Region. Some of the other columns are shown at several other pages on the website. The Class, Annotation and Event_Info columns are shown prominently on several locations. Note: We strongly recommend all the data providers to submit genomic locations in addition to required fields for future visualization features.","Free Text","Free text description of the gene or transcript rearrangement.","Fusion detected from DNA sequence data, \"Yes\" or \"No\".","Genomic position of breakpoint of Gene 1.(strongly recommended field)","Genomic position of breakpoint of Gene 2.","GRCh37","GRCh37,GRCh38","HUGO gene symbol of gene 1. One might call this the left site(3’) as well. (strongly recommended field)","HUGO gene symbol of gene 2.One might call this the right site(5’) as well.","In_frame, Out-of-frame,Frameshift","Intron of ERG(-): 6Kb after exon 1","Intron of TMPRSS2(-): 511bp before exon 3","Length of the structural variant in number of bases.","NCBI_Build","Normal_Paired_End_Read_Count","Normal_Read_Count","Normal_Split_Read_Count","Normal_Variant_Count","Number of Site 1 region e.g. exon 2.(strongly recommended field)","Number of Site 2 region e.g. exon 4.","Out-of-frame","PRECISE","PRECISE or IMPRECISE which explain the resolution. Fill in PRECISE if the breakpoint resolution is known down to the exact base pair.","PRECISE/IMPRECISE","Protein fusion: out of frame (TMPRSS2-ERG)","q22.2","q22.3","RNA_Support","Sample_1","Sample_Id (Required)","Site1_Chromosome","Site1_Contig","Site1_Description","Site1_Ensembl_Transcript_Id","Site1_Entrez_Gene_Id","Site1_Hugo_Symbol","Site1_Position","Site1_Region","Site1_Region_Number","Site2_Chromosome","Site2_Contig","Site2_Description","Site2_Effect_On_Frame","Site2_Ensembl_Transcript_Id","Site2_Entrez_Gene_Id","Site2_Hugo_Symbol","Site2_Position","Site2_Region","Site2_Region_Number","SOMATIC","SOMATIC or GERMLINE","SV_Length","SV_Status (Required)","The contig of Site 1.(strongly recommended field)","The contig of Site 2.","The effect on frame reading in gene 2. Frame_Shift or InFrame,but it is a free text.","The NCBI assembly. Only one assembly per study can be used, see study metadata.","The number of paired-end reads of the normal tissue that support the call.","The number of paired-end reads of the tumor tissue that support the call. [Tumor Paired End Read Count is the same as “Spanning Fragments”.]","The number of reads of the normal tissue that have the variant/allele.","The number of reads of the tumor tissue that have the variant/allele.","The number of split reads of the normal tissue that support the call.","The number of split reads of the tumor tissue that support the call.[Tumor Split Read Count is the same as “Junction Reads”.]","The total number of reads of the normal tissue.","The total number of reads of the tumor tissue.","TMPRSS2","TMPRSS2 (NM_001135099) - ERG (NM_001243428) fusion (TMPRSS2 exons 1-2 fused with ERG exons 4-11):(c.126+879:TMRPSS2_c.40-63033:ERGdel)","Tumor_Paired_End_Read_Count","Tumor_Read_Count","Tumor_Split_Read_Count","Tumor_Variant_Count","We advise using one of these {5_PRIME_UTR,3_PRIME_UTR,PROMOTER,EXON,INTRON},but it is a free text.","We advise using one of these {5_Prime_UTR,3_Prime_UTR,Promoter,Exon,Intron},but it is a free text. (strongly recommended field)","We advise using one of these terms [DELETION, DUPLICATION, INSERTION, INVERSION or TRANSLOCATION], but it is free text.","Which direction the connection is made (3' to 5', 5' to 3', etc)","Yes","Yes or No"]},{"l":"Adding your own structural variant columns","p":["Additional mutation annotation columns can be added to the structural variant data file. In this way, the portal will parse and store your own structural variant fields in the database.","See Custom namespace columns for more information on adding custom columns to data files."]},{"l":"Fusion Data","p":["⚠️ DEPRECATED Use the: SV format instead"]},{"i":"meta-file-11","l":"Meta file","p":["The fusion metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: FUSION","datatype: FUSION","stable_id: fusion","show_profile_in_analysis_tab: true.","profile_name: A name for the fusion data, e.g., \"Fusions\".","profile_description: A description of the fusion data.","data_filename: your datafile","gene_panel (Optional): gene panel stable id"]},{"i":"example-15","l":"Example","p":["An example metadata file would be:"]},{"i":"data-file-10","l":"Data file","p":["A fusion data file is a two dimensional matrix with one gene per row. For each gene (row) in the data file, the following tab-delimited values are required in the order specified:","Hugo_Symbol: A HUGO gene symbol.","Entrez_Gene_Id: A Entrez Gene identifier.","Center: The sequencing center.","Tumor_Sample_Barcode: This is the sample ID.","Fusion: A description of the fusion, e.g., \"TMPRSS2-ERG fusion\".","DNA_support: Fusion detected from DNA sequence data, \"yes\" or \"no\".","RNA_support: Fusion detected from RNA sequence data, \"yes\" or \"no\".","Method: Fusion detected algorithm/tool.","Frame: \"in-frame\" or \"frameshift\".","Fusion_Status (OPTIONAL): An assessment of the mutation type (i.e., \"SOMATIC\", \"GERMLINE\", \"UNKNOWN\", or empty)","Note: If a fusion event includes a gene, e.g., Hugo_Symbol or Entrez_Gene_Id, that is not profiled, the event will be filter out during import into the database.","An example data file which includes the required column header would look like:"]},{"l":"Case Lists","p":["Case lists are used to define sample lists that can be selected on the query page. Some case lists have specific functionality, but it's also possible to add custom case lists. The case list files should be placed in a sub-directory called case_lists which exists alongside all the other cancer study data. The case list file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","stable_id: it must contain the cancer_study_identifier followed by an underscore. Typically, after this a relevant suffix, e.g., _custom, is added. There are some naming rules to follow if you want the case list to be selected automatically in the query UI base on the selected sample profiles. See subsection below.","case_list_name: A name for the patient list, e.g., \"All Tumors\".","case_list_description: A description of the patient list, e.g., \"All tumor samples (825 samples).\".","case_list_ids: A tab-delimited list of sample ids from the dataset.","case_list_category: Optional alternative way of linking your case list to a specific molecular profile. E.g. setting this to all_cases_with_cna_data will signal to the portal that this is the list of samples to be associated with CNA data in some of the analysis."]},{"i":"example-16","l":"Example","p":["An example case list file would be:"]},{"l":"Case list stable id suffixes","p":["In order for sample counts to propagate to the data sets widget on the home page and the table on the Data Sets page, the following case list suffixes need to be used in the stable_id property (e.g. brca_tcga_pub_sequenced). This is also needed for correct statistics in the Study view page when calculating the frequency of CNA and of mutations per gene in the respective summary tables.","Sequenced: _sequenced. When only a mutation profile is selected on the query page, this is the default case list. Also used in the Study Summary to calculate the proportion of samples with mutations.","CNA: _cna. When only a CNA profile is selected on the query page, this is the default case list. Also used in the Study Summary to calculate the proportion of samples with CNA.","Sequenced and CNA: _cnaseq. When a mutation and CNA genetic profile are selected on the query page, this is the default case list.","mRNA (microarray): _mrna. When only a mRNA (microarray) profile is selected on the query page, this is the default case list.","mRNA (RNA-Seq): _rna_seq_mrna. When only a mRNA (RNA-Seq) profile is selected on the query page, this is the default case list.","mRNA (RNA-SeqV2): _rna_seq_v2_mrna. When only a mRNA (RNA-SeqV2) profile is selected on the query page, this is the default case list.","mRNA normal: _normal_mrna. Used for the datasets page to calculate the number of normal samples.","mRNA normal: _microrna. Used for the datasets page to calculate the number of microRNA samples.","Methylation (HM27): _methylation_hm27.","RPPA: _rppa. When only a RPPA profile is selected on the query page, this is the default case list.","Sequenced, CNA and mRNA: _3way_complete When a mutation, CNA and mRNA profile are selected on the query page, this is the default case list.","SV: _sv. When a structural variant profile is selected on the query page, this is the default case list. Also used in the Study Summary to calculate the proportion of samples with fusions.","All: _all. If you are not using add_global_case_list attribute in Study metadata, you need to add this case list."]},{"l":"Required case lists","p":["Some case lists are required:","_all. This can be generated by the importer if you set the attribute add_global_case_list to true in the Study metadata.","_sequenced. This case list is required when loading mutation data.","_cna. This case list is required when loading discrete cna data."]},{"l":"Case list categories","p":["These are the valid case lists categories for case_list_category: in the meta file.","all_cases_in_study","all_cases_with_mutation_data","all_cases_with_cna_data","all_cases_with_log2_cna_data","all_cases_with_methylation_data","all_cases_with_mrna_array_data","all_cases_with_mrna_rnaseq_data","all_cases_with_rppa_data","all_cases_with_microrna_data","all_cases_with_mutation_and_cna_data","all_cases_with_mutation_and_cna_and_mrna_data","all_cases_with_gsva_data","all_cases_with_sv_data","other"]},{"l":"Timeline Data","p":["The timeline data is a representation of the various events that occur during the course of treatment for a patient from initial diagnosis. In cBioPortal timeline data is represented as one or more tracks in the patient view. Each main track is based on an event type, such as \"Specimen\", \"Imaging\", \"Lab_test\", etc.","Attention: some clinical attributes affect the timeline visualization. Please check the Clinical Data section for more information.","This type data is not yet being validated. It can, however, be uploaded."]},{"i":"meta-file-12","l":"Meta file","p":["Each event type requires its own meta file. A timeline meta file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: CLINICAL","datatype: TIMELINE","data_filename: your datafile","An example metadata file would be:"]},{"i":"data-file-11","l":"Data file","p":["And optional columns with special effects:","Each event type requires its own data file, which contains all the events that each patient undergoes. The data format used for timeline data is extremely flexible. There are four required columns:","EVENT_TYPE: the category of the event. You are free to define any type of event here. For several event types cBioPortal has column naming suggestions and for several events there are column names which have special effects. See event types for more information.","Example column:","HYPERLINK_OPEN_IN_NEWTAB: [Link text Here](https://link-url-here.org/path:blank)","link-in-timeline-data","PATIENT_ID: the patient ID from the dataset","SOME_HYPERLINK: [Link text Here](https://link-url-here.org)","SPECIMEN_REFERENCE_NUMBER: SPECIMEN_REFERENCE_NUMBER is no longer supported. Please replace this column header with SAMPLE_ID.","START_DATE: the start point of any event, calculated in * days from the date of diagnosis (which will act as point zero on the timeline scale)","STOP_DATE: The end date of the event is calculated in days from the date of diagnosis (which will act as point zero on the timeline scale). If the event occurs over time (e.g. a Treatment, ...) the STOP_DATE column should have values. If the event occurs at a time point (e.g. a Lab_test, Imaging, ...) the STOP_DATE is still mandatory, but the values should be blanks.","STYLE_COLOR: when this column has a hexadecimal color value (e.g. #ffffff), it will be used as the color for rendering this event.","STYLE_SHAPE: when this column has a valid value, this event will be rendered using that shape. The valid shapes are circle, square, triangle, diamond, star, and camera.","The external link can be opened in a new tab, instead of an IFRAME within the same window/tab. To do this, the string :blank is to be added as a suffix at the end of the URL.","To embed hyperlinks in custom columns:","Values in custom columns can include markdown for hyperlinks that will show up in event tooltips, allowing users to click through to external resources associated with events."]},{"l":"Event Types","p":["AGENT_CLASS: This allows you to classify your agents into useful groups.","AGENT: for medical therapies, the agent is defined with number of cycles if applicable and for radiation therapy, the agent is defined as standard dose given to the patient during the course.","As previously mentioned, the EVENT_TYPE can be anything. However, several event types have columns with special effects. Furthermore, for some event types cBioPortal has column naming suggestions.","Based on different cancer types you can add additional data here.","DIAGNOSTIC_TYPE_DETAILED: Detailed description of the event type.","DIAGNOSTIC_TYPE: This attribute will cover the different diagnostics tools used (for example: MRI, CT scan etc.)","EVENT_TYPE: IMAGING","EVENT_TYPE: LAB_TEST","EVENT_TYPE: SPECIMEN","EVENT_TYPE: STATUS","EVENT_TYPE: TREATMENT","RESULT: corresponding value of the test","RESULT: Results of the diagnostic tests","SOURCE: Where the status was monitored.","SOURCE: Where was the Imaging done.","SOURCE: Where was the specimen collection done.","Special: all dots in the IMAGING track are squares.","Special: When using the AGENT and SUBTYPE columns, each agent and subtype will be split into its own track.","Special: When using the TEST and RESULT columns, each test gets its own track. Any TEST that has only numerical RESULT values will be rendered as a line chart.","SPECIMEN_SITE: This is the site from where the specimen was collected.","SPECIMEN_TYPE: This can either be tissue or blood.","STATUS: If the EVENT_TYPE is status, data is entered under STATUS to define either the best response from the treatment or if there is a diagnosis of any stage progression etc.","SUBTYPE: Depending upon the TREATMENT_TYPE, this can either be Chemotherapy, Hormone Therapy, Targeted Therapy etc. (for Medical Therapies) or WPRT, IVRT etc. (for Radiation Therapies).","Suggested columns","TEST: type of test performed","TREATMENT_TYPE: This can be either Medical Therapy or Radiation Therapy."]},{"l":"Clinical Track Ordering","p":["Clinical tracks are ordered as follows (if available):","Specimen","Surgery","Status","Diagnostics","Diagnostic","Imaging","Lab_test","Treatment","First custom event","etc."]},{"i":"example-17","l":"Example","p":["An example timeline file for SPECIMEN would be:","Assuming the sample identifiers were also defined in the clinical file, this will lead to a timeline track with numbered specimen samples.","An example timeline file for Lab_test would be:","This will lead to a timeline track for Lab_test with an additional subtrack specifically for PSA. PSA's events will be sized based on the result."]},{"i":"gistic-20-data","l":"GISTIC 2.0 Data","p":["Running GISTIC 2.0 on e.g. GenePattern not only provides the Discrete Copy Number Data, but also provides an amp_genes and a del_genes file. These cannot be directly imported into cBioPortal, but first have to be converted to a different file format. An example can be found in the ACC TCGA study on cBioPortal Datahub.","After uploading a gistic_amp and/or gistic_del file, significantly recurrently copy-number altered genes will be labeled and available for query in the \"CNA Genes\" table on the study view, like in the TCGA Legacy / Firehose data set for bladder cancer:","recurrently-altered-genes-table"]},{"i":"meta-file-13","l":"Meta file","p":["The Gistic metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: GISTIC_GENES_AMP or GISTIC_GENES_DEL","datatype: Q-VALUE","reference_genome_id: reference genome version. Supported values: \"hg19\"","data_filename: your datafile","An example metadata file would be:"]},{"i":"data-file-12","l":"Data file","p":["The following fields from the generated Gistic file are used by the cBioPortal importer:","chromosome: chromosome on which the region was found, without the chr prefix","peak_start: start coordinate of the region of maximal amplification or deletion within the significant region","peak_end: end coordinate of the region of maximal amplification or deletion within the significant region","genes_in_region: comma-separated list of HUGO gene symbols in the 'wide peak' (allowing for single-sample errors in the peak boundaries)","amp: 1 for amp, 0 for del","cytoband: cytogenetic band specification of the region, including chromosome (Giemsa stain)","q_value: the q-value of the peak region"]},{"i":"example-18","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"l":"MutSig Data","p":["MutSig stands for \"Mutation Significance\". MutSig analyzes lists of mutations discovered in DNA sequencing, to identify genes that were mutated more often than expected by chance given background mutation processes. You can download mutsig from broadinstitute(MutSigCV 1.4 is available) or run mutsig (MutSigCV 1.2 is available) using GenePattern.","Note: The tcga files that are uploaded to cBioPortal are generated using MutSig2.0. This version is not available outside broadinstitute.","The MutSigCV 1.2 output is different from the MutSig2.0 header. TODO: test the 1.4 version. Requires > 10GB of memory","After uploading a MutSig file, significantly recurrently mutated genes will be labeled and available for query in the \"Mutated Genes\" table on the study view, like in the TCGA Legacy / Firehose data set for bladder cancer:","recurrently-mutated-genes-table","This type data is not yet being validated. It can, however, be uploaded."]},{"i":"meta-file-14","l":"Meta file","p":["The MutSig metadata file should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: MUTSIG","datatype: Q-VALUE","data_filename: your datafile","An example metadata file would be:"]},{"i":"data-file-13","l":"Data file","p":["The following fields from a MutSig file are used by the cBioPortal importer:","rank","gene: this is the HUGO symbol","N (or Nnon): bases covered","n (or nnon): number of mutations","p: result of testing the hypothesis that all of the observed mutations in this gene are a consequence of random background mutation processes, taking into account the list of bases that are successfully interrogated by sequencing (i.e., “covered”) and the list of observed somatic mutations, as well as the length and composition of the gene in addition to the background mutation rates in different sequence contexts ( https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3059829/)","q: p value correct for multiple testing"]},{"i":"example-19","l":"Example","p":["An example data file which includes the required column header would look like:"]},{"l":"Gene Panel Data","p":["Gene panel functionality can specify which genes are assayed on a panel and assign samples and genetic profiles (such as mutation data) to a panel.","To include gene panel data in your instance, the following data and/or configurations can be used:","Gene panel file: This file contains the genes on the gene panel. A panel can be used for multiple studies within the instance and should be loaded prior to loading a study with gene panel data. For information on the format and import process please visit: Import-Gene-Panels.","Gene panel matrix file: This file is used to specify which samples are sequenced on which gene panel in which genetic profile. This is recommended for mutation and structural variant data, because the MAF and structural variant formats are unable to include samples which are sequenced but contain no called mutations, and only a single gene panel can be defined in the meta file. For other genetic profiles, columns can be added to specify their gene panel, but a property can also be added to their respective meta file, because these data files contain all profiled samples. Although the gene panel matrix functionality overlaps with the case list functionality, a case list for mutations (_sequenced) and Structural variants (_sv) is also required.","Gene panel property in meta file: Adding the gene_panel: property to the meta file of data profile will assign all samples from that profile to the gene panel. In this case it is not necessary to include a column for this profile in the gene panel matrix file."]},{"l":"Gene Panel Matrix file"},{"l":"Columns and rows","p":["The gene panel matrix file contains a list of samples in the first column, and an additional column for each profile in the study using the stable_id as the column header. These stable_id's should match the ones in their respective meta files, for example mutations for mutation data and gistic for discrete CNA data. Columns should be separated by tabs. Fusion events are saved in the mutation table in the cBioPortal database, so they should be included in the mutations column. As described above, genetic profiles other than mutation and fusion data profiles can use the gene_panel: meta property if all samples are profiled on the same gene panel."]},{"l":"Values","p":["For each sample-profile combination, a gene panel should be specified. Please make sure this gene panel is imported before loading the study data. When the sample is not profiled on a gene panel, or if the sample is not profiled at all, use NA as value. If the sample is profiled for mutations, make sure it is also in the _sequenced case list."]},{"i":"example-20","l":"Example","p":["An example file would look like this:","SAMPLE_ID","mutations","gistic","SAMPLE_ID_1","IMPACT410","SAMPLE_ID_2","SAMPLE_ID_3","NA"]},{"i":"meta-file-15","l":"Meta file","p":["The gene panel matrix file requires a meta file, which should contain the following fields:","cancer_study_identifier: same value as specified in study meta file","genetic_alteration_type: GENE_PANEL_MATRIX","datatype: GENE_PANEL_MATRIX","data_filename: your datafile","Example:"]},{"l":"Gene panel property in meta file","p":["If all samples in a genetic profile have the same gene panel associated with them, an optional field can be specified in the meta data file of that datatype called gene_panel:. If this is present, all samples in this data file will be assigned to this gene panel for this specific profile."]},{"l":"Gene Set Data","p":["A description of importing gene sets (which are required before loading gene set study) can be found here. This page also contains a decription to import gene set hierarchy data, which is required to show a hierarchical tree on the query page to select gene sets.","cBioPortal supports GSVA scores and GSVA-like scores, such as ssGSEA. The Gene Set Variation Analysis method in R (GSVA, Hänzelmann, 2013) can calculate several types of scores (specified with the methods= argument) and outputs a score between -1 and 1. The GSVA method also calculates a p-value per score using a bootstrapping method.","To import the GSVA(-like) data, a score and p-value data file are required. It is important that the dimensions of the score and p-value file are the same and that they contain the same gene sets and samples. Both data files require a meta file."]},{"l":"GSVA score meta file","p":["The meta file will be similar to meta files of other genetic profiles, such as mRNA expression. For both GSVA and GSVA-like scores, GSVA-SCORE is used as datatype and gsva_scores is used as stable_id.","Required fields:","Example:"]},{"l":"GSVA score data file","p":["The data file will be a simple tab separated format, similar to the expression data file: each sample is a column, each gene set a row, each cell contains the GSVA score for that sample x gene set combination.","The first column is geneset_id and contains the names of the gene sets. Gene set names should be formatted in uppercase. The other columns are sample columns: An additional column for each sample in the dataset using the sample id as the column header.","The cells contain the GSVA(-like) score: which is real number, between -1.0 and 1.0, representing the score for the gene set in the respective sample, or NA when the score for the gene set in the respective sample could not be (or was not) calculated. Example with 2 gene sets and 3 samples:"]},{"l":"GSVA p-value meta file","p":["For both GSVA and GSVA-like p-values, P-VALUE is used as datatype and gsva_pvalues is used as stable_id.","Required fields:","Example:"]},{"l":"GSVA p-value data file","p":["The data file will be a simple tab separated format, similar to the score file: each sample is a column, each gene set a row, each cell contains the p-value for the score found for sample x gene set combination.","The first column is geneset_id and contains the names of the gene sets. Gene set names should be formatted in uppercase. The other columns are sample columns: An additional column for each sample in the dataset using the sample id as the column header.","The cells contain the p-value for the GSVA score: A real number, between 0.0 and 1.0, representing the p-value for the GSVA score calculated for the gene set in the respective sample, or NA when the score for the gene is also NA. Example with 2 gene sets and 3 samples:"]},{"l":"Study Tags file","p":["YAML or JSON file which contains extra information about the cancer study. No compulsory fields are required for this file (free-form). To enable this feature, you need to add a line in the cancer study meta file with tags_file: followed the YAML/JSON file name. The information on the YAML or JSON file will be displayed in a table when mousing over a tag logo in the studies on the query page."]},{"l":"Generic Assay","p":["Generic Assay is a two dimensional matrix generalized to capture non-genetic measurements per sample. Instead of a gene per row and a sample per column, a Generic Assay file contains a generic entity per row and a sample per column. A generic entity is defined by the data curator and generally means something other than a gene. Some examples include, treatment response or mutational signatures. For each generic entity - sample pair, a (real number / text / binary value) represents a captured measurement."]},{"l":"Generic Assay meta file","p":["The generic assay metadata file should contain the following fields:","Example:"]},{"l":"Note on stable_id","p":["The stable_id for the generic assay datatype is a user defined field. The only requirement is that the stable_id is unique across all metafiles in the study."]},{"l":"Note on pivot_threshold_value","p":["The pivot_threshold_value is an arbitrary value that specifies a critical boundary that distinguish important from unimportant values in a generic assay profile. This boundary will be used in different visualizations to highlight important observations. Whether smaller or larger values are considered to be important can be controlled with the value_sort_order field (see below).","In a heatmap for a generic assay profile important values are shown in darker shades of blue, whereas unimportant values are shown in darker shades of red. The values represented by the pivot_threshold_value is shown in white. When defined, the pivot_threshold_value will always be included in the legend, even when all datapoints are all more extreme. In the Waterfall plot in Plots tab of Results view pivot_threshold_value determines the boundary between up- and downward deflections in the plot.","When no sensible idea exists for a boundary between important and unimportant observations the pivot_threshold_value field should not be defined (excluded from meta file). In this case all values will be shown as shades of blue in the heatmap and the waterfall plot will show up- and downward deflections around 0."]},{"l":"Note on value_sort_order","p":["The value_sort_order field can be used to indicate whether small or large value are considered to be more important. When value_sort_order is ASC smaller values are considered to be more important. When value_sort_order is DESC larger values are considered to be more important.","The default value for value_sort_order is ASC.","The value_sort_order is used by the Oncoprint when aggregating generic assay data in a tooltip that covers multiple samples from a single patient. When value_sort_order is ASC the sample with the smallest response value will be shown for that patient. When value_sort_order is DESC the sample with the largest value will be shown for that patient.","In the heatmap for a generic assay profile in results view data points with smaller values will show as darker blue when the value_sort_order is ASC. When value_sort_order is DESC larger values are assigned a darker blue color.","The value_sort_order is used by the Waterfall plot for orientation of the x-axis so that important observations are shown at the left side of the plot. When value_sort_order is ASC the x-axis will be in ascending order with smaller values to the left. When value_sort_order is DESC the x-axis will be in descending order with larger values to the left."]},{"l":"Note on generic_entity_meta_properties","p":["All meta properties must be specified in the generic_entity_meta_properties field. Every meta property listed here must appear as a column header in the corresponding data file. It's highly recommend to add NAME, DESCRIPTION and an optional URL to get the best visualization on OncoPrint tab and Plots tab."]},{"l":"Note on patient_level","p":["Generic Assay data will be considered sample_level data if the patient_level property is missing or set to false. In addition, the patient or sample identifiers need to be included in the Clinical Data file."]},{"l":"Note on Generic Assay genetic_alteration_type and datatype","p":["All generic assay data is registered to be of the type of genetic_alteration_type and data type can choose from LIMIT-VALUE, CATEGORICAL and BINARY.","LIMIT-VALUE: This datatype is intended to be used for any numerical data set with similar structure (entities measured in samples). The LIMIT-VALUE is validated to contain any continuous number optionally prefixed with a '>' or '<' threshold symbol (e.g., '>8.00').","CATEGORICAL (under development): This datatype is intended to be used for any categorical data set with similar structure (entities measured in samples). Any text is allowed in CATEGORICAL.","BINARY (under development): This datatype is intended to be used for any binary data set with similar structure (entities measured in samples). The BINARY is validated to contain only reserved text ( true, false, yes, no).","If the value for the generic entity in the respective sample could not (or was not) be measured (or detected), the value should be 'NA' or leave that cell blank."]},{"l":"Generic Assay data file","p":["The data file will be a simple tab separated format, similar to the expression data file: each sample is a column, each generic entity is a row, each cell contains values for that generic entity x sample combination.","For each generic entity (row) in the data file, the following columns are required in the order specified:","ENTITY_STABLE_ID: Any unique identifier using a combination of alphanumeric characters, _ and -.","And:","An additional column for each generic_entity_meta_properties in the metafile, using the property name as the column header (e.g., 'NAME').","An additional column for each sample in the dataset using the sample id as the column header.","Example with 3 generic entities and 3 samples:"]},{"l":"Arm Level CNA Data","p":["Arm-level copy-number data is a predefined subtype of Generic Assay Data.","Allowed values for Arm-level copy-number data are Loss, Gain, and Unchanged, use NA or leave the cell blank to indicate a missing value.","Please find example file format here: Meta file example and Data file example"]},{"l":"Mutational Signature Data","p":["Mutational Signature data is a predefined subtype of Generic Assay Data. Setting generic_assay_type: MUTATIONAL_SIGNATURE in the meta file will make cBioPortal interpret the data as Mutational Signature data."]},{"l":"Mutational Signature meta files","p":["The mutational signature meta files follow the same convention as the Generic Assay Meta file, however there are some key differences:","genetic_assay_type should be set to MUTATIONAL_SIGNATURE","datatype should be set to LIMIT_VALUE","stable_id values should end with: _{filetype}_{identifier}, where:","filetype is either contribution, pvalue or counts","identifier is consistent between files belonging to the same analysis","Multiple signatures can be added to a single study, as long as they have different identifiers in their stable id (e.g., contribution_SBS and contribution_DBS)","In generic_entity_meta_properties the NAME value is required. The DESCRIPTION and URL values can be added to display more information and link to external resources in the mutational signatures tab."]},{"l":"Mutational Signature data files","p":["The mutational signature data files follow the same convention as the Generic Assay Data file. Each collection of mutational signatures can consist of up to three different data files, each with an accompanying meta file.","Signature contribution file ( required)","Data file containing the contribution of each signature-sample pair. Values are expected to be 0 ≥ x ≥ 1.","Signature pvalue file (optional)","Data file containing p-values for each signature-sample pair. Values below 0.05 will be shown as significant.","Mutational counts matrix file (optional)","Data file containing nucleotide changes of a sample. cBioPortal has specific visualization options for single-base substitutions (96 channels), double-base substitutions (72 channels) and insertion/deletions (83 channels), following the signatures defined by COSMIC. But other channels can also be used. Values are expected to be positive integers."]},{"l":"Resource Data","p":["The resource data is used to capture resource data in patients, samples and studies. The resources will be represented by URLs with meta data. The types of resources include:","Files: pdf, txt, png, json, etc.","Web links: non-file links e.g. URLs to other systems","the resource file is split into a resource definition file, sample resource file, patient resource file and study resource file. All data files are required to have a matching meta file."]},{"i":"meta-files-1","l":"Meta files","p":["The resource metadata files have to contain the following fields:","cancer_study_identifier: same value specified in meta_study.txt","resource_type: value from (DEFINITION / SAMPLE / PATIENT / STUDY)","data_filename: your datafile"]},{"i":"examples-1","l":"Examples","p":["An example metadata file, e.g. named meta_resource_definition.txt, would be:","An example metadata file, e.g. named meta_resource_sample.txt, would be:"]},{"i":"data-files-1","l":"Data files"},{"l":"Resource Definition Data File","p":["The resource definition file should follow this format, it has three required columns:","RESOURCE_ID (required): a unique resource ID. This field allows only numbers, letters, points, underscores and hyphens.","DISPLAY_NAME (required): a display name for resources.","RESOURCE_TYPE (required): resource type for resources, must be SAMPLE, PATIENT or STUDY.","DESCRIPTION (optional): a discription for resources.","OPEN_BY_DEFAULT (optional): define if the resource will be open by default ( true/ false), dafault is false.","PRIORITY (optional): if not given, will give a default value."]},{"l":"Example Resource Definition data file"},{"l":"Sample Resource Data File","p":["The sample resource file should follow this format, it has four required columns:","PATIENT_ID (required): a unique patient ID. This field allows only numbers, letters, points, underscores and hyphens.","SAMPLE_ID (required): a unique sample ID. This field allows only numbers, letters, points, underscores and hyphens.","RESOURCE_ID (required): a unique resource ID which should also be included in the Resource Definition data file.","URL (required): url to the resources, start with http or https."]},{"l":"Example Sample Resource data file"},{"l":"Patient Resource Data File","p":["The patient resource file should follow this format, it has three required columns:","PATIENT_ID (required): a unique patient ID. This field allows only numbers, letters, points, underscores and hyphens.","RESOURCE_ID (required): a unique resource ID which should also be included in the Resource Definition data file.","URL (required): url to the resources, start with http or https."]},{"l":"Example Patient Resource data file"},{"l":"Study Resource Data File","p":["The study resource file should follow this format, it has two required columns:","RESOURCE_ID (required): a unique resource ID which should also be included in the Resource Definition data file.","URL (required): url to the resources, start with http or https."]},{"l":"Example Study Resource data file"},{"l":"Custom namespace columns"},{"l":"Adding annotation columns through namespaces","p":["Custom columns can be added to the data files of mutations, structural variants and discrete copy number (long) data. The columns can be imported through the namespace mechanism into a database table column called ANNOTATION_JSON. Any columns starting with a prefix specified in the namespaces field in the metafile will be imported into the database. Namespace columns should be formatted as the namespace and namespace attribute seperated with a period (e.g ASCN.total_copy_number where ASCN is the namespace and total_copy_number is the attribute).","An example cBioPortal mutation data file with the following additional columns:","imported with the following namespaces field in the metafile:","will import the ASCN.total_copy_number and ASCN.clonal column into the database. MUTATION.name and MUTATION.type will be ignored because mutation is not specified in the namespaces field."]},{"l":"Representation of namespace columns by mutation API endpoints","p":["Columns added through namespaces will be returned by relevant mutation, discrete copy number and structural variant API endpoints. Namespace data will be available in the namespaceColumn of respective JSON representations of mutation records. The namespaceColumns property will be a JSON object where namespace data is keyed by name of the namespace in lowercase. For instance, when namespace ZYGOSITY is defined in the meta file and the data file has column ZYGOSITY.status with value Homozygous for a mutation row, the API will return the following JSON record for this mutation (only relevant fields are shown):","Note: ASCN namespace data is not exported via the namespaceColumns field."]},{"l":"Representation of namespace columns in the cBioPortal frontend","p":["Namespace columns will be added as columns to mutation, structural variant and copy number alteration tables in Patient View and Results View. The case of the namespace in the column header will be as specified in the mutations meta file and the column name will be capitalized.","For instance, this metafile entry:","and this column header:","will show in the mutation table with column name:","Note: namespace columns are recognized by a case-insensitive match of the namespace reported in the meta file and the first word in the column header."]}],[{"l":"Z Score normalization script"},{"l":"Introduction","p":["For some data types, when uploading to cBioPortal, it is currently necessary to also provide a z-score transformed version of your input file. The z-score data is essential for the oncoprint functionality. The oncoprint shows high or low mRNA expression of the genes, based on the threshold the user sets when selecting the genomic profile.","⚠️ Please keep in mind that the z-scores are calculated using only patient data. Hence, 'mRNA High' in this case implies higher expression than the average patient. Also, the source data on which the z-score data is based does not necessarily follow the normal distribution. If your data does not follow the normal distribution, the z-score threshold is less reliable and will result in more false positives or false negatives. You can consider log transforming your value before calculating z-scores to improve this. However, even the logged values may not follow the normal distribution, especially if the data is bimodal."]},{"l":"The cBioPortal Z-Score calculation method","p":["cBioPortal currently generates two z-score profiles using two different base populations:","Distribution based on diploid samples only: The expression distribution for unaltered copies of the gene is estimated by calculating the mean and variance of the expression values for samples in which the gene is diploid (i.e. value is \"0\" as reported by discrete CNA data). We call this the unaltered distribution. If the gene has no diploid samples, then its normalized expression is reported as NA.","Distribution based on all samples: The expression distribution of the gene is estimated by calculating the mean and variance of all samples with expression values. If the gene has samples whose expression values are all zeros or non-numeric, then its normalized expression is reported as NA.","Otherwise for every sample, the gene's normalized expression for both the profiles is reported as","where r is the raw expression value, and mu and sigma are the mean and standard deviation of the base population, respectively."]},{"l":"How to proceed","p":["cBioPortal expects z-score normalization to take place per gene. You can calculate z-scores with your own preferred method, or use one of the cBioPortal provided approaches:","convertExpressionZscores.pl applies Method 1 (diploid samples as base population)","NormalizeExpressionLevels_allsampleref.py applies Method 2 (all samples as base population)","Examples of the calculation and running the programs are below."]},{"l":"convertExpressionZscores method","p":["Given expression and Copy Number Variation data for a set of samples (patients), generate normalized expression values."]},{"l":"Parameters","p":["copy_number_file expression_file output_file normal_sample_suffix [min_number_of_diploids]","copy_number_file: the discrete copy number (CNA) file","expression_file: the expression (exp) data file.","output_file: the output file to be generated","normal_sample_suffix: use this to identify which of your samples are \"normal\" samples (if any). E.g. normal TCGA samples have a suffix \"-11\". Set it to some dummy value, e.g. \"NONE\", if you have no normal samples in your data."]},{"l":"Algorithm","p":["Input: discrete copy number (CNA) and expression (exp) files"]},{"l":"Example Calculation","p":["Calculate mean and stdev where CNA is 0 (=diploid):","Calculate the z-scores:","Note: this implies that your full dataset does not have average=0, std=1"]},{"l":"Running the script","p":["To run the script type the following commands when in the folder cbioportal_source_folder/core/src/main/scripts:","and then"]},{"i":"example","l":"Example:"},{"l":"NormalizeExpressionLevels_allsampleref method","p":["Given the expression data for a set of samples, generate normalized expression values with the reference population of all samples independent of sample diploid status."]},{"i":"parameters-1","l":"Parameters","p":["expression_file output_file [log_transform] [exclude_zero_negative_values]","expression_file: the expression (exp) data file.","output_file: the output file to be generated.","log_transform: Use this to log transform the data before calculating z-scores (optional).","exclude_zero_negative_values: Use this to exclude zero's or negative counts from the reference population when normalizing the data (optional)."]},{"i":"algorithm-1","l":"Algorithm","p":["Input expression data file"]},{"l":"Log-transforming the data","p":["Using the -l option above calculates log base 2 of the expression values.","Here's how we handle the Negative values when log transforming:"]},{"i":"example-calculation-1","l":"Example Calculation:","p":["Log transform and calculate the z-scores (without -e option):"]},{"i":"running-the-script-1","l":"Running the script","p":["To run the script clone the datahub-study-curation-tools from here and type the commands when in the folder zscores/zscores_relative_allsamples:"]},{"i":"example-1","l":"Example:"}],[{"l":"Study Curation Guide","p":["This guide is to help data curators learn how to curate a study on their own computer"]},{"l":"Prerequisites","p":["To follow this guide the curator should have some familiarity with running commands on the command line. We will be using Docker. No Docker knowledge is required, one will obtain some basic understanding by following the guide.","Learn how to setup cBioPortal locally here first."]},{"l":"Load an example study","p":["After having followed the steps in the Docker Deployment instructions, you will end up with the study Low-Grade Gliomas (UCSF, Science 2014) loaded locally. Now let's try to import another study:","Choose another example study from the datahub. Note the name of the folder which is identical to the study id defined in meta_study.txt.","From the root of the cbioportal-docker-compose folder run DATAHUB_STUDIES=my_study_id ./study/init.sh. Change my_study_id to the study you picked in 1. The study should now be downloaded in ./study/.","Import the study by running docker-compose exec cbioportal metaImport.py -u http://cbioportal:8080 -s study/my_study_id/ -o. Again change my_study_id to the study you picked in 1. This should import the study.","Restart the cbioportal instance docker-compose restart cbioportal and see if the new study shows up on http://localhost:8080"]},{"l":"Curate a new study","p":["The cBioPortal team has curated many published studies in formats suitable for import in cBioPortal. These can be found on the datahub and can serve as an example of how our curation processes works. You can find a step by step description of how to curate a new study here."]}],[{"i":"data-loading-importing-without-validation-and-deleting-studies","l":"Data loading: Importing without validation and deleting studies","p":["For data curators and developers cbioportalImporter.py is available. This script can import data regardless of validation results. If data format is incorrect, the importer may stop with an error or crash, or leave the database in an inconsistent state.","This script can also be used to delete studies.","Requirements","Importing a study without validation","Deleting a study"]},{"l":"Requirements","p":["This script requires $PORTAL_HOME to point to the folder containing your cBioPortal configuration. This can be done with:","The script itself can be found in cbioportal_source_folder/core/src/main/scripts/importer."]},{"l":"Importing a study without validation","p":["To import a study without validation, run:","For example:"]},{"l":"Deleting a study","p":["To remove a study, run:","The meta_study.txt file should contain the study ID in cancer_study_identifier: of the study you would like to remove.","For example:","If you have the Cancer Study Id of the study, or studies you want to remove, you can also use:","Where study1_id is the Cancer Study Id of the study you would like to remove.","You can also remove multiple studies at once by passing the Cancer Study Ids separated by commas:","Where study1_id, study2_id and study3_id are the Cancer Study IDs of the studies you would like to remove."]}],[{"l":"Importing single data files for development","p":["In some cases, for example during development, it may be useful to import a single data file into an existing study. To import one data file at a time, you can use the following command. Note that this process will not validate the data.","This can be done by running cbioportalImporter.py from cbioportal_source_folder/core/src/main/scripts/importer/."]},{"l":"Requirements","p":["This script requires $PORTAL_HOME to point to the folder containing your cBioPortal configuration. This can be done with:"]},{"l":"Workflow","p":["First, if your cancer type does not yet exist, you need to create it:","Next, create the study using","The meta file has to contain the study information.","Now you can import your data file(s):","⚠️ Your first data file should always be the clinical data!","Finally, after you've imported all data, import your case lists:"]},{"i":"example","l":"Example:"}],[{"l":"Data Loading Tips and Best Practices","p":["Here we describe some Tips and Best Practices."]},{"i":"running-gistic-20","l":"Running GISTIC 2.0","p":["To generate discrete copy number data file you may need to run GISTIC 2.0. GISTIC 2.0 can be installed or run online using the GISTIC 2.0 module on GenePattern. Running GISTIC 2.0 requires two input files:","A segmentation file, which contains the segmented data","A marker file, which identifies the marker names and positions of the markers in the original dataset (before segmentation).","In some cases the marker file may not be available. Your can create one as follows: Using your segmentation file, create a line for each start and end position. E.g. if your seg file contains","In your markerfile this becomes"]},{"l":"Effect of cBioPortal instance on validation","p":["When validating data, you can decide against which server to validate your data with the -u flag. The selected server can have a significant effect on the validation results in the following ways:","Genes may or may not be available on a specific server","Clinical data and its description may vary per server","...","It is advised to use the server where you plan on upload your data as validation server."]}],[{"l":"Mutation data transcript annotation","p":["This document describes how each mutation in cBioPortal gets annotated with a specific gene symbol + protein change."]},{"l":"Biological Background","p":["This section explains the concepts of protein isoforms and transcripts."]},{"i":"what-is-an-isoform","l":"What is an isoform?","p":["From a single gene (string of nucleotides) multiple protein sequences can be formed (string of amino acids). For example: parts of the gene that code for proteins (exons) can be included or excluded through a process known as alternative splicing. Each of the different resulting proteins is called an isoform. A single mutation can impact the isoforms differently. E.g. in one isoform it might change a P to a T, but in the other isoform that particular exon does not get included and it is therefore not changing the amino acid sequence at all. In cBioPortal for convenience sake we assign a single gene symbol + protein change to each mutation. For most cases this works well because there is only one protein isoform relevant in a clinical setting. There are of course exceptions and we are therefore working on improving this representation. An explanation of the relation between transcripts and protein isoforms can be found in the next section."]},{"i":"what-is-a-transcript","l":"What is a transcript?","p":["DNA is transcribed to a pre-mRNA transcript which includes intron and exon regions. Splicing and other processes then take place to form the resulting mature mRNA transcript that only contains exons, which subsequently can be translated to a protein sequence. An mRNA transcript can thus be associated with a specific protein isoform. The Ensembl database assigns ids for these transcript with names like ENSTxxx. You can see this on e.g. the Ensembl website for the BRAF gene:","The transcript ENST00000288602.6 is 2480 base pairs long (nucleotides ACGT) and the associated protein isoform is 766 amino acids (V/P/etc). You can see we are showing that same transcript and protein isoform on cBioPortal:","For each gene name in cBioPortal a canonical/default transcript is assigned. These assignments are stored in Genome Nexus and explained below. Although cBioPortal does not store changes to different transcripts/isoforms for each mutation in the database itself, it does allow viewing them on the Mutations Tab by re-annotating the mutations on the fly through Genome Nexus whenever a user clicks on the transcript dropdown."]},{"l":"Transcript Assignment","p":["The cBioPortal database stores one gene + protein change annotation for each mutation event in the database. To allow comparing mutation data across studies it is important to annotate the mutation data (be it in MAF or VCF format) in the same way, otherwise the gene + protein changes can mean entirely different things. For all public studies stored in datahub we leverage Genome Nexus to do so. Genome Nexus assigns one canonical Ensembl Transcript + gene name + protein change for each mutation. You can find the mapping of hugo symbol to transcript id here. There are two sets of default transcripts: uniprot and mskcc. We recommend to use the mskcc set of transcripts when starting from scratch, since these are more up to date and correspond to transcripts that were chosen as relevant for clinical sequencing at MSKCC. The uniprot set of transcripts was constructed several years ago, but we are no longer certain about the logic on how to reconstruct them hence they are not being kept up to date. One can see the differences between the two in this file. For the public cBioPortal (https://www.cbioportal.org) and datahub we are using mskcc, for the GENIE cBioPortal ( https://genie.cbioportal.org) we still use uniprot. As of cBioPortal v5 the default is mskcc for local installations. Prior to v5 it was uniprot. We recommend that people upgrading to v5 consider migrating to mskcc as well (see migration guide and the properties reference docs)."]},{"l":"How default transcript assignment affects the Mutations Tab","p":["The Mutations Tab shows the full protein sequence. The one shown by default is the canonical transcript ( mskcc or uniprot depending on configuration). The mutations are drawn on the lollipop based on the protein position found in the cBioPortal database. For the public cBioPortal all mutation data in MAF format are annotated using Genome Nexus to add the gene and protein change columns. This is then imported into the cBioPortal database. Whether you choose to use the set of uniprot or mskcc transcripts, make sure to indicate it in the [Genome Nexus Annotation Pipeline]( https://github.com/genome-nexus/genome- nexus-annotation-pipeline#maf-annotation)(--isoform-override mskcc or uniprot) when annotating as well as in the properties file of cBioPortal. That way the Mutations Tab will show the correct canonical transcript. Note that whenever somebody uses the dropdown on the Mutations Tab to change the displayed transcript, Genome Neuxs re-annotates all mutations on the fly. The browser sends over the genomic location (chrom,start,end,ref, alt) to get the protein change information for each transcript. Since many of the annotations are for the canonical transcripts only we are currently hiding annotations for non-canonical transcripts."]},{"l":"Plans for default transcripts","p":["We are planning to move to a single set of default transcripts over time. Prior to v5 uniprot was used for the public facing portals and local installations. Our plan is to use mskcc everywhere and eventually we will most likely move to MANE. MANE is only available for grch38 and since most of our data is for grch37 this is currently not feasible. Whichever set of transcripts you choose to use, make sure to indicate so in the Genome Nexus Annotation Pipeline(--isoform-override mskcc or uniprot) and put the same set of transcripts in the properties file of cBioPortal, such that the Mutations Tab will show the correct canonical transcript (currently defaults to mskcc). The re-annotation of mutations only happens once a user clicks to change the transcript, which is why it's important that the protein change in the database is for the specific transcript displayed first."]}],[{"l":"Import OncoKB annotations as custom driver annotations","p":["The Annotation Configuration menu in Study View and Group Comparison is available only when custom driver annotations are present in the cBioPortal database for the genes in the study (or studies). In order to use OncoKB annotations to filter mutations and discrete copy number alteration in Study View and Group Comparison, OncoKB annotations can be added to the respective data files of a study prior to import into the database. This page describes how to import OncoKB annotations as custom driver annotations. It assumes the following requirements have been satisfied:","The cBioPortal software has been correctly built from source.","The user is able to successfully import a study into the database.","The study subjected to OncoKB import is confirmed to be valid"]},{"l":"Import of OncoKB annotations when loading a study","p":["OncoKB annotations can be added automatically to the study files when the study is loaded into the database by adding the--import_oncokb parameter to the metaImport.py script like so:","This will add OncoKB data to the mutation and discrete CNA files of a study, revalidate the results and load the study into the database.","The addition of mutation and discrete CNA files is explained in detail below."]},{"l":"Update of MAF file with OncoKB annotations","p":["OncoKB annotations can be added to the MAF file by running importOncokbMutation.py like so:","Where -s is the path to the directory of the MAF file and -u is the URL to a cBioPortal instance (needed for resolution of gene identifiers).","importOncokbMutation.py will add OncoKB annotations as custom driver annotation columns in the MAF file. The unmodified MAF file will be stored in the study directory with the ONCOKB_IMPORT_BACKUP_ prefix."]},{"l":"Update of Discrete Copy Number file with OncoKB annotations","p":["OncoKB annotations can be added to the Discrete Copy Number data by running importOncokbDiscreteCNA.py like so:","Where -s is the path to the directory of the iscrete Copy Number data file and -u is the URL to a cBioPortal instance (needed for resolution of gene identifiers).","importOncokbDiscreteCNA.py will create a custom driver annotation file with name data_cna_pd_annotation.txt in the study directory. It will add a field pd_annotations_filename field in the CNA meta file that references the newly created custom driver annotation file. The unmodified CNA meta file will be stored in the study directory with the ONCOKB_IMPORT_BACKUP_ prefix."]}],[{"l":"Import Gene Sets in cBioPortal","p":["Gene sets are collections of genes that are grouped together based on higher level function or system characteristics, such as being part of the same molecular process or found to be co-regulated for example. Assessing gene sets in cBioPortal is useful when the user wants to visualize the number of mutations in sets of genes, or wants to see if all genes in a set are up- or down-regulated. To visualize gene set variation in a sample, the user can calculate scores per gene set per sample using the Gene Set Variation Analysis (GSVA) algorithm ( Hänzelmann, 2013).","Before loading a study with gene set data, gene set definitions have to be added to the database. These can be custom user-defined sets, or sets downloaded from external sources such as MSigDB. Additionally, a gene set hierarchy can be imported which is used on the cBioPortal Query page for selecting gene sets."]},{"l":"Quick example","p":["This example shows how the process of importing gene set data using test data.","Navigate to scripts folder:","Import gene sets and supplementary data: Note: This removes existing gene set, gene set hierarchy and gene set genetic profile data.","Import gene set hierarchy data:","Restart Tomcat if you have it running or call the /api/cache endpoint with a DELETE http-request(see here for more information).","Import study (replace argument after -u with local cBioPortal and -html with preferred location for html report):"]},{"l":"Requirements for gene sets in cBioPortal","p":["Gene set functionality was added in cBioPortal 1.7.0. Please use this or a later version. In addition, the database has to be updated to version 2.3.0 or higher, depending on the cBioPortal version. This can be done by running the python wrapper migrate_db.py for migration.sql.","Updating the database is described here."]},{"l":"Import Gene Sets"},{"l":"File formats","p":["Once you have initialized MySQL with cBioPortal database, it is possible to import gene sets. The format of the gene set data file is the Gene Matrix Transposed file format (.gmt). This format is also used by the MSigDB, which hosts several collections of gene sets on: https://software.broadinstitute.org/gsea/msigdb/","Sample of .gmt file:","GMT files contain a row for every gene set. The first column contains the EXTERNAL_ID or stable id(MsigDB calls this \"standard name\"), e.g. GO_POTASSIUM_ION_TRANSPORT, not longer than 100 characters. The second column contains the REF_LINK. This is an optional URL linking to external information about this gene set. Column 3 to N contain the Entrez gene IDs that belong to this gene set.","Additional information can be placed in a supplementary file. This file should be a .txt, containing columns for the stable id, the long name (max 100 characters) and description of the gene set (max 300 characters).","Sample of supplementary .txt file:"]},{"l":"Run the gene set importer","p":["The importer for gene sets can be run with a perl wrapper, which is located at the following location and requires the following arguments:","The --new-version argument with a Version parameter is used for loading new gene set definitions. It is not possible to add new gene sets or change the genes of current gene sets, without removing the old gene sets first. This is to prevent the user from having gene sets from different definitions and data from older definitions. The user can choose the name or number of the Version as he likes, e.g. msigdb_6.1 or Oncogenic_2017. Running the script with --new-version removes all previous gene sets, gene set hierarchy and gene set genetic profiles. A prompt is given to make sure the user wants to do this. Note that it is possible enter the same version as the previous version, but previous data is removed nevertheless.","The --update info can be used only to update only the long name, description and reference URL."]},{"l":"Import Gene Set hierarchy","p":["After importing gene sets, you can import a gene set hierarchy that is used on the query page to select gene sets."]},{"l":"File format","p":["For gene set hierarchy files, we use the YAML format. This is common format to structure hierarchical data.","Sample of format (note this is mock data):","To make your own hierarchy, make sure every branchname ends with :. Every branch can contain new branches (which can be considered subcategories) or gene sets (which are designated by the Gene sets: statement). The gene set names are the stable ids imported by ImportGenesetData.java and should start with -."]},{"l":"Running the gene set hierarchy importer"},{"l":"Import a study with gene set data","p":["Gene set data can be added to a study folder and subsequently import the whole study with metaImport.py. cBioPortal supports GSVA Scores and p-values (from bootstrapping) calculated using Gene Set Variation Analysis (GSVA, Hänzelmann, 2013). A description of GSVA study data can be found in the cBioPortal File Formats documentation."]},{"l":"References","p":["GSVA: gene set variation analysis for microarray and RNA-Seq data Sonja Hänzelmann, Robert Castelo and Justin Guinney, BMC Bioinformatics, 2013 https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-14-7 https://www.bioconductor.org/packages/release/bioc/html/GSVA.html","Gene set enrichment analysis: A knowledge-based approach for interpreting genome-wide expression profiles Aravind Subramanian, Pablo Tamayo, Vamsi K. Mootha, Sayan Mukherjee, Benjamin L. Ebert, Michael A. Gillette, Amanda Paulovich, Scott L. Pomeroy, Todd R. Golub, Eric S. Lander, and Jill P. Mesirov, PNAS, 2005 https://www.pnas.org/content/102/43/15545 https://software.broadinstitute.org/gsea/msigdb"]}],[{"l":"Import Gene Panels","p":["This page describes how to import a gene panel into the cBioPortal database. It assumes the following requirements have been satisfied:","The cBioPortal software has been correctly built from source.","The gene panel to import is in the proper file format. See Gene Panel File format for more information.","The PORTAL_HOME environment variable has been properly defined. See Loading a Sample Study for more information."]},{"l":"Gene panel file format","p":["The gene panel file follows the format of a meta file with the following fields:","stable_id: The name of the gene panel. This should be unique across all studies, as gene panels can be globally applied to any sample and any genetic profile.","description: A description of the gene panel.","gene_list: Tab separated genes, represented either by all gene symbols or all Entrez gene IDs.","An example gene panel file would be:"]},{"l":"Import command","p":["In this example, we are loading the example gene panels which resides in the sample dataset study_es_0.","After loading gene panels into the database, please restart Tomcat or call the /api/cache endpoint with a DELETE http-request(see here for more information) so that the validator can retrieve gene panel information from the cBioPortal API."]},{"l":"Update existing gene panel","p":["If a gene panel exists in the database with the same name as the one being imported, and there exists cancer study data that refers to this gene panel, the ImportGenePanel command will abort. In order to reimport the gene panel in this situation, run the UpdateGenePanel command.","If the incoming gene panel is the same as the original gene panel, whether through importing or updating, then no changes shall be made to the gene panel. If the incoming gene panel is empty, then the script will abort. Genes in the incoming gene panel that were not in the original shall be added to the existing gene panel. Conversely, genes not in the incoming gene panel that were in the original shall be removed from the existing gene panel. The UpdateGenePanel command will prompt twice to confirm changes made to the gene panel, such as genes to be added or removed."]}],[{"l":"Study View Customization"},{"l":"How does the study view organize the charts","p":["Study view page is fully responsive, it will try to fit as many charts as possible based on the browser's width and height.","The layout of a chart is determined mainly based on priority. Higher priority will promote chart closer to the left-top.","In order to improve the layout, we added a layout algorithm layer. The study view page is using grid layout. All charts will be put into 2-dimensional systems. For example, pie chart, by default, takes 1 block and bar chart uses two blocks. All charts will be placed from left to right, top to bottom. In order to prevent misalignment, we promote small charts to fit into the space.","For logged-in(authenticated) users, charts layout is saved to users profile i.e, whenever user tries to re-visits the same url, previously saved layout will be loaded."]},{"l":"Study View Customization with Priority Data","p":["Example of study view in public portal: https://www.cbioportal.org/study?id=acc_tcga,lgg_tcga#summary"]},{"l":"Priorities","p":["20","200","2000","30","300","3000","40","400","70","8","80","9","90","Additional Info","AGE","Cancer Studies","CANCER_TYPE","CANCER_TYPE_DETAILED","Chart name(clinical attribute ID)","CNA Bar Chart","CNA Genes Table","Currently, we preassigned priority to few charts, but as long as you assign a priority in the database except than 1, these preassigned priorities will be overwritten.","Disease Free Survival Plot","Frontend default priority","GENDER, SEX","Mutated Genes Table","Mutation Count Bar Chart","Mutation Count vs. Fraction of Genome Altered Density Plot","Number of Samples Per Patient","Overall Survival Plot","The default score is 1.","The priority system is represented with a final score. The higher the final (numeric) score, the higher priority assigned.","This is combination of DFS_MONTH and DFS_STATUS","This is combination of OS_MONTH and OS_STATUS","To disable the chart, set the priority to -1.(Currently disables charts for single clinical attributes only)","To promote certain chart in study view, please increase priority in the database to a certain number. The higher the score, the higher priority it will be displayed in the study view. If you want to hide chart, please set the priority to 0. For combination chart, as long as one of the clinical attributes has been set to 0, it will be hidden."]}],[{"l":"Updating your cBioPortal installation","p":["As of release 1.1.0 cBioPortal has a Database schema update mechanism which comes into play whenever the new version of the portal code relies on specific DB schema changes to be applied. The portal will automatically check if the DB schema is according to what it expects. It does so by comparing the version number of the portal code with the version number of the DB schema. If they are equal, it assumes the DB schema has been upgraded. If not, it will require the administrator to run a migration script. Below are the steps to ensure your DB schema is updated correctly."]},{"l":"First time","p":["The first time you update from release 1.0.4(or lower) to release 1.1.0(or higher), you should get a an error banner page after restarting your webserver. The error should state something like:","where xxx and yyy will be different version numbers.","If you get DB version expected by Portal: 0(i.e. you are building the new release from source), you need to add a new property to your portal.properties file which is needed for this check."]},{"l":"Step1","p":["In your portal.properties file (e.g. your_cbioportal_dir/src/main/resources/portal.properties) add the following property:"]},{"l":"Step2","p":["Compile your code again. After restarting the webserver the page should now state something like: DB version expected by Portal: 1.1.0(or higher), while the DB version remains as Current DB version: -1."]},{"l":"Running the migration script","p":["First, make sure you have the DB connection properties correctly set in your portal.properties file (see DB connection settings here).","Dependencies: the migration script is a Python script that depends on the mysqlclient library. If necessary, you can install it with the following commands (example for Ubuntu):","For macOS, try the following:","and see https://github.com/PyMySQL/mysqlclient-python/blob/master/README.md#prerequisites if problems occur during installation.","To run the migration script first go to the scripts folder your_cbioportal_dir/core/src/main/scripts and then run the following command:","This should result in the following output:","Final step: Restart your webserver or call the /api/cache endpoint with a DELETE http-request(see here for more information)."]}],[{"l":"Updating the gene names and aliases tables","p":["This manual is intended for users that have knowledge about the structure of the cBioPortal seed database.","When loading studies into cBioPortal it is possible for warnings to occur that are caused by an outdated seed database. Gene symbols can be deprecated or be assigned to a different Entrez Gene in a new release. Also Entrez Gene IDs can be added. This markdown explains how to update the seed database, in order to use the most recent Entrez Gene IDs.","The cBioPortal scripts package provides a method to update the gene and gene_alias tables."]},{"l":"Prepare"},{"l":"Human genes","p":["Download gene_info.txt Generated based on latest HGNC release using script HERE"]},{"l":"Mouse genes","p":["Download Mus_musculus.gene_info.gz from ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Mus_musculus.gene_info.gz Unzip the downloaded file with the command gunzip Mus_musculus.gene_info.gz"]},{"l":"MySQL steps","p":["Execute these steps in case you want to reset your database to the most recent genes list from NCBI.","1- Start a new MySQL database with the previous seed database, which can be found on cBioPortal Datahub for human and mouse.","2- If DB engine supports foreign key (FK) constraints, e.g. InnoDB, drop constraints:","3- Empty tables gene and gene_alias","4- Restart cBioPortal (restart webserver) or call the /api/cache endpoint with a DELETE http-request(see here for more information) to clean-up any cached gene lists.","5- To import gene data type the following commands when in the folder cbioportal_source_folder/core/src/main/scripts:"]},{"i":"human-genes-1","l":"Human genes"},{"i":"mouse-genes-1","l":"Mouse genes","p":["IMPORTANT NOTE:","The reference_genome table needs to be populated before updating the gene table. Further details can be found in this document.","Use --species option when importing genes for a species other than human","Use the gene table if you query information such as hugo symbols, types of the gene","Use reference_genome_gene table if you query information such as chromosome, cytoband, exonic length, or the start or end of the gene","Load genes only to the reference_genome_gene table without updating the gene table, please use the following command:"]},{"i":"human-genes-2","l":"Human genes"},{"i":"mouse-genes-2","l":"Mouse genes","p":["6- ⚠️ Check the gene and gene_alias tables to verify that they are filled correctly.","7- Additionally, there are other tables you may want to update now (only in human).","Updating the COSMIC coding mutations, can be downloaded from here and require the script importCosmicData.pl","8- Clean-up old data:","9- If DB engine supports FK constraints, e.g. InnoDB, restore constraints:","10- You can import new gene sets using the gene set importer. These gene sets are currently only used for gene set scoring. See Import-Gene-Sets.md and File-Formats.md#gene-set-data.","For example, run in folder cbioportal_source_folder/core/src/main/scripts:","Please make sure the version gene sets is the same as the version used to calculate gene set scores in your data."]}],[{"l":"Migration Guide","p":["This page describes various changes deployers will need to make as they deploy newer versions of the portal. -"]},{"i":"v53---v54","l":"v5.3 -> v5.4","p":["Remove db.host and db.portal_db_name and db.use_ssl properties from the portal.properties file or JVM parameters. Update property db.connection_string to encode the hostname, port, database and other parameters according to Database Settings documentation and pass via portal.properties file or as JVM parameter."]},{"i":"v4---v5","l":"v4 -> v5","p":["All fusion profiles are now required to be migrated to structural variant format. One can use this migration tool to migrate the fusion files.","All fusion files on datahub were migrated to the structural variant format and their molecular profile ids were renamed from {study_id}_fusion to {study_id}_structural_variants. If you are using these datahub files one would need to re-import them.","Study view user setting will be outdated after migration, please follow Clear Study View User settings section in Session Service Management","The new default set of transcripts for each gene has changed from uniprot to mskcc. See the Mutation Data Annotation Section for more details. To keep the old set of default transcripts add the property genomenexus.isoform_override_source=uniprot to the properties file.","See the v5.0.0 release notes for more details."]},{"i":"v3---v4","l":"v3 -> v4","p":["Introduces logback package for logging. If you don't have any custom log4j.properties file, no changes are necessary","Cleans up several old databases ( PR). In theory the migration should be seamless, since the docker container detects an old database version and migrates it automatically.","See the v4.0.0 release notes for more details."]},{"i":"v2---v3","l":"v2 -> v3","p":["Session service is now required to be set up. You can't run it without session service. The recommended way to run cBioPortal is to use the Docker Compose instructions."]},{"i":"v1---v2","l":"v1 -> v2","p":["Changes cBioPortal to a Single Page App (SPA) written in React, Mobx and bootstrap that uses a REST API. It shouldn't change anything for a deployer."]}],[{"i":"msk-maintenance-in-progress","l":"MSK Maintenance (In Progress)","p":["We provide our cBioPortal's maintenance page publicly in the hope that it may be useful to others"]},{"l":"Database Migration","p":["In the database migration process, we are going to have two main steps: building importers and updating database scheme."]},{"l":"Building Importers","p":["(Optional) Remove existing jars","[--build|-b=], build_specifier should be one of the following","[--cbioportal-git-hash|-cgh=]","[--skip-deployment|-sd=]","[--skip-git-pull|-sgp=]","all (build for all artifacts)","Available parameters:","Build all importers at once (build all importers except cmo-pipelines)","Build importer","Build multiple importers","Building single importer:","cbioportal","cd /data/portal-con/git-repos/pipelines","cd /data/portal-cron/git-repo/pipelines-configuration/build-importer-jars","cmo-pipelines (cmo-pipelines artifacts only)","Codebases:","Copy importer properties to /data/portal-con-git-repos/cbioportal/src/main/resources(e.g. triage /data/portal-cron/git-repos/pipelines-configuration/properties/import-triage/*)","genie-archive-importer","genie-aws-importer","genome nexus annotation pipeline","Go to build importer jars folder","hgnc-importer","importers (all importers except cmo-pipelines)","Login to pipelines server","Make sure the local cbioportal codebase is on the correct git hash at /data/portal-cron/git-repos/cbioportal","msk-cmo-importer","msk-dmp-importer","mvn clean install -DskipTests","Navigate to pipelines folder","pipelines","public-importer","rm *.jar","Run export PORTAL_HOME=/data/portal-cron/git-repos/cbioportal","Run sh /data/portal-con/scripts/automation-environment.sh","Sets general env variables needed for building","sh buildproductionjars.sh -sd=true -sgp=true -b=importers","Specific to cbioportal build, looks for properties and xml files off of $PORTAL_HOME","Specify importer to be build","Take the cbioportal hash and add it to genome nexus annotation pipeline POM -> push to fork","Take the cbioportal hash and new genome nexus annotation pipeline hash (from above) and propagate to pipelines POM; also update db version if db migration is part of change","triage-cmo-importer"]},{"l":"Updating Database Scheme","p":["Database needs to be updated one by one, we have four main databases: triage, private, genie, and public. Take triage database as an example.","Migrate one database (e.g. triage)","SSH into pipeline server","Checkout to the commit that contains the latest database scheme","Check if property sets up correctly to the right database (triage)","vi /data/portal-con-git-repos/cbioportal/src/main/resources/portal.properties","Move to directory","cd /data/portal-con-git-repos/cbioportal","Run database migration using script:","python3 core/src/main/scripts/migrate_db.py --properties-file src/main/resources/portal.properties --sql db-scripts/src/main/resources/migration.sql","Monitor the DB migration process and look for possible errors","Access database and verify the DB scheme is updated"]}],[{"l":"Development","p":["cBioPortal is an open source project hosted on Github. The core team will consider pull requests from any source.","The following section describes how to set up a development environment and start contributing.","Note that cBioPortal is split into a backend project (api) and a frontend project which are kept in separate repositories.","Frontend(Typescript, React, Mobx)","Backend(Java, Spring, MySQL, MongoDB)","If you are interested in coordinating the development of new features, please contact cbioportal@cbioportal.org or reach out on https://slack.cbioportal.org."]}],[{"l":"Feature Development Guide","p":["This is a guide for developers that are implementing a new feature."]},{"l":"Before Implementation","p":["As a first step it is important to determine how complex the proposed feature is. Incremental improvements on existing features are often easier to accomplish and require input from fewer people. Most minor changes can be submitted as a Pull Request. If the proposed feature would require one or more days of work it makes sense to connect on slack to discuss the idea. For more complex new features that require weeks of work or more, it is best to get input from several people in the cBioPortal community, including people with a deep understanding of the cBioPortal product and its users as well as the engineers that write the software. In that case we often start out with a Request For Comments document that describes the feature in more detail, see our list of RFCs for some examples. The community can then help guide the feature development in the right direction.","During this process you will most likely receive some pointers which part of the stack you will be editing (see Architecture Overview). This will be helpful when actually starting your implementation and figuring out how to set up your development environment. For many features it is not necessary to understand all parts of the stack, so seeking out advice on this is highly recommended.","Before you start implementing a more complex feature, ideally many of these things are clear:","Who can you contact for help?","Who will be helping to review the code?","What part of the stack will you work on?","Gold stars if you already start thinking about:","How do we release the feature incrementally?","When is feature development done?","See more thoughts about these topics further below"]},{"l":"Starting Implementation","p":["Once you are ready to start implementing, the first thing is to set up the development environment. We strive to make this as easy as possible, but it can often still take some time so definitely reach out if you run into issues. If you haven't submitted a Pull Request to cBioPortal before, it might make sense to look at some good first issues before starting with your feature. This will help to get some familiarity with the process of proposing a change, getting it reviewed, making edits and getting it to production. Don't pick anything to complicated for a first issue, it could be as simple as fixing some typos in the README."]},{"l":"During Implementation","p":["The most important part during implementation is communication. Continue getting feedback as your implementation evolves. One of the best ways to do this is to fully integrate into the development team while you work on a feature. Anybody is welcome to join our weekly planning meeting (Tuesdays 11AM-12PM Eastern Time) and our daily standups 2.30-3PM Eastern Time. Please reach out on slack to get an invite. If these times are not ideal or you're working on the feature more sporadically then it's totally fine to skip them."]},{"l":"Plan to release to production early","p":["Don't wait until the feature is fully finished to get feedback from the product team and the engineering team. Think about ways we can release a portion of the feature to production without finishing the entire thing. We have found feature flags to work well here. Instead of using long running feature branches we try to add a configuration property that allows us to turn the feature on or off. That way portions of the code can be released to production early on. We want to avoid working on some piece of code for more than a week or so without being able to release it. For instance: if one is trying to add some new tab on the Patient View Page, one could start with adding the on/off configuration switch for this tab. That could be released to production relatively quickly."]},{"l":"Regression Testing","p":["Make sure to think about ways to incorporate testing for your feature. We have an extensive suite of unit, integration and end to end tests including automated browser testing that mimics user interaction. Adding some regression tests will make sure the feature won't break with new versions of cBioPortal."]},{"i":"when-is-feature-development-done","l":"When is feature development done?","p":["There are many stages in feature development:","Design","Implementation","Review","Production Deployment","Production Usage Monitoring","The process is hardly ever a linear line and it can move back and forth between any stage. This is expected and one of the reasons why time estimations are notoriously hard. An additional note is that feature development usually isn't done at the moment it gets merged to the main branch and deployed, but rather only after a few weeks of using it in production and not identifying any new issues."]}],[{"l":"cBioPortal ER Diagram","p":["cBioPortal ER Diagram - PDF Version"]}],[{"l":"cBioPortal Database Versioning","p":["Add a constraint","Add a new column to the table","Create a new table","Delete a table","Delete data from a table:","Drop a constraint","Drop column c from the table","Insert multiple rows into a table","MAJOR: A non-backward compatible significant change in the database. Which requires the maintainer to reload and re-import all studies in the database entirely.","MINOR: Including deleting, renaming tables or columns, or changing constraints","PATCH: Changes that don't change existing database schemes but add new tables or columns, manipulating data.","Remove all data in a table","Rename a table from t1 to t2","Rename column c1 to c2","Update data in a table:","We follow the following logic when deciding how/when to increment the version of cBioPortal database. It's a complete modification of semantic versioning (MAJOR.MINOR.PATCH) more suitable for our purposes:"]}],[{"l":"Build with different frontend versions","p":["Maven will build cBioPortal with a cBioPortal-frontend version and git repository url as determined by respectively the frontend.version and frontend.groupId parameters in the root POM.xml.","To build cBioPortal with a different frontend version different values for frontend.version and frontend.groupId parameters can be specified as part of the maven install command. For example:","Remarks:","The frontend.version parameter allows release tags (e.g. 'v2.1.0') and commit sha-hashes (e.g., '93d9cbcb').","The frontend.groupId is a reversed, dot-separated derivative of the git url. Git repository location github.com/cbioportal is represented by the com.github.cbioportal groupId."]}],[{"l":"Session Service"},{"l":"1. Steps to add new Session type through session-service","p":["Define new type at SessionType.java.","Add necessary tests if required.","Update session-service documentation."]},{"l":"2. cBioPortal Backend"},{"i":"21-configuring-session-service","l":"2.1 configuring session-service","p":["Here are the properties that needs to be set"]},{"i":"sessionserviceurl-format","l":"session.service.url format"},{"l":"example"},{"i":"22-updates-to-code","l":"2.2 Updates to code","p":["Update session-service dependency version in pom.xml.","Add/Update api's in SessionServiceController.java to support new session type.","Sometimes we might need to defined model for session. Check how session class is extended in VirtualStudy and how it is consumed in SessionServiceController.java."]},{"l":"3. cBioPortal Frontend","p":["Client code for cbio session service is in https://github.com/cBioPortal/cbioportal-frontend/blob/master/src/shared/api/sessionServiceAPI.ts. Update it accordingly when the new api added in cBioPortal backend SessionServiceController.java"]},{"l":"4. Local development"},{"i":"41-expose-database-port","l":"4.1 Expose database port","p":["If docker-compose.yml is used for running session service and if you like to access mongodb directly then uncomment this block of code"]},{"i":"42-test-results-with-curl-in-terminal","l":"4.2 Test results with cURL in terminal","p":["After you have local cBioPortal instance running, you can use cURL to test session-service endpoints.","Example for GET methods:","Example for POST methods:","Note on Cookie: You can log in to your account in your local cBioPortal instance, after login, copy cookies from requests in network tab. See login configuration for more information."]}],[{"l":"Manual test cases","p":["Linked below is a Google Doc with some manual test cases you might want to look at when developing, and deploying new changes. Note that this list is a work in progress and subject to change.","https://docs.google.com/document/d/1IniMNrrSEGsPRtkaU3Vbcm6sMgcOhytBurIRg0SqXgA/edit?usp=sharing"]}],[{"l":"Release Procedure","p":["We have release procedures for the following scenarios:","cBioPortal community release of code already in production","Release with database migration"]},{"l":"cBioPortal community release of code already in production","p":["We often run code in production that is not ready yet for use by the wider cBioPortal community. We deploy to production what's in the master branch of the backend repo and the frontend repo. Often times this is not a tagged release. At some point this code should be released for the wider community. These are the steps we follow:","Create a new frontend tag. The releases can be found here: https://github.com/cBioPortal/cbioportal-frontend/releases. A draft of the release notes are automatically generated by https://github.com/marketplace/actions/release-drafter. If there are pull requests in the Changes section i.e. they have not been labeled with one of the labels defined here. Try to label them and trigger a rerun by committing something to the master branch. Alternatively you can manually put them in a particular section. Note that our goal is to have automated release notes, so it would be great if you could send a PR to update the release-drafter.yml in case you find certain PRs don't fit in a particular section or a section should be altered. Look at other release notes for inspiration: https://github.com/cBioPortal/cbioportal-frontend/releases. You can save your work as a draft if necessary.","Once the frontend code is tagged, create a pull request to the backend repo where the frontend version is incremented in portal/pom.xml:","Once that PR is merged, one can create a tag for the backend repo with the same tag as the frontend repo. You should see a draft from release drafter similar to the frontend in the backend repo: https://github.com/cBioPortal/cbioportal/releases. The idea is to create one set of release notes in the backend repo that is a combination of the frontend and backend notes. To make the hyperlinks from the frontend repo work in the backend repo you can copy the frontend release notes raw markdown and run the following one liner to convert the links:","then put them in the right sections following same style as other releases: https://github.com/cBioPortal/cbioportal-frontend/releases.","Create a news item with a link to your carefully crafted release notes. Highlight a few major changes that could be interesting to users of cBioPortal ideally with a screenshot similar to: https://github.com/cBioPortal/cbioportal/pull/6914/files?short\\_path=6f95322#diff-6f953229832059bab3fe229d4af08b52(in the files changed section, you can click on view rich diff to see the converted markdown)."]},{"l":"Release with database migration","p":["For releases with database migrations, we increase the MINOR number in MAJOR.MINOR.PATCH. For those releases we have a separate branch (see https://github.com/cBioPortal/cbioportal/blob/master/CONTRIBUTING.md#branches-within-cbioportal), which needs to be merged to master on both backend and frontend:","Make sure no auto deployment is running for frontend from netlify","Merge frontend release-x.y.z branch to frontend master","Follow same procedure as for a PATCH release, but instead of having a separate PR to update the frontend (step 2) one can add it to the already existing backend branch release-x.y.z and open the PR from there to backend's master. This is merely for convenience to avoid having to create another branch just to update the frontend version."]},{"l":"A note on versioning"},{"l":"From pre-release to official release","p":["On the GitHub Release Page you will see that some releases have the pre-release indication whereas others do not. In general we make a new pre-release release every week. We test it out in production ( https://cbioportal.org) for one month and if no new critical issues are identified we make it an official release. Occasionally we make a new official release in less than a month's time if we identify a critical issue in the previous release."]},{"l":"cBioPortal Software Version Numbers","p":["We follow the following logic when deciding how/when to increment the version of cBioPortal. It's a complete modification of semantic versioning (MAJOR.MINOR.PATCH) more suitable for our purposes:","MAJOR : A big change in how cBioPortal works. We changed the major version from 1 to 2 when we completely moved from using JSPs to a Single Page App written in React calling a REST service. Another example: we changed from 2 to 3 when we made session-service a requirement.","MINOR : Changes that require a database migration, an upgrade to another cBioPortal component (e.g. session-service) or anything that could require additional effort for a deployer of cBioPortal (e.g. see transcript change).","PATCH : Changes that don't require database migrations. Could be new features as well as bug fixes to either frontend, backend or both.","See also: https://github.com/cBioPortal/cbioportal/releases"]},{"l":"cBioPortal Database Version Numbers","p":["cBioPortal database version numbers are different version numbers than the software version numbers, it's only updated when developers make database scheme changes, please see cBioPortal Database Versioning"]},{"l":"User announcements","p":["The following is a provisional system of alerting user to new features and announcements. It would probably be better for these messages to be configurable on an instance level by installers. For now, the following suffices.","Top banner: see sample configuration in src/shared/components/userMessager/UserMessage.tsx","For a beacon and associated dialog message, use this as a model, where child of InfoBeacon component is any component that will be shown when InfoBeacon is moused over and conditions are met"]}],[{"l":"Deployment Procedure","p":["This describes our internal deployment procedure. Shared publicly, in case it may be of use. Instructions on how to deploy cBioPortal can be found elsewhere, see e.g. Deploying the web application and Deploy using Docker.","We deploy the master branch of backend and the master branch of frontend to production. The public portal ( https://www.cbioportal.org) runs on AWS inside kubernetes. The configuration can be found in the knowledgesystems repo:","https://github.com/knowledgesystems/knowledgesystems-k8s-deployment","Other portals run at MSKCC on two internal machines called dashi and dashi2. Since we're running several apps in several tomcats internally the procedure for updating them is different from the public portal on AWS. The configuration is in the mercurial portal-configuration repo. To make changes, ask Ben for access.","The frontend and backend can be upgraded independently. We have the following events that can require a new deployment:","New frontend commit in master","New backend commit in master"]},{"l":"New frontend commit in master","p":["Currently we are auto-deploying the frontend master branch to netlify: https://frontend.cbioportal.org. So any change should be automatically built and deployed to the relevant portals if the frontend configuration has been set up properly. Do note that the current build time for the frontend project is ~ 15 minutes or so. To see what frontend commit is deployed, check window.FRONTEND_COMMIT in the console of the browser."]},{"l":"Public Portal Frontend URL","p":["The public portal is on AWS and running inside a Kubernetes cluster. The URL that it gets the frontend version from is here:","https://github.com/knowledgesystems/knowledgesystems-k8s-deployment/search?q=-Dfrontend.url&unscoped_q=-Dfrontend.url","This should be a URL pointing to netlify."]},{"l":"Internal Portal Frontend URL","p":["For the internally runnning portals the frontend.url is defined in the portal.properties file in the mercurial portal-configuration repo. If set up correctly, this should point to a file on both dashi and dashi2 that in turn points to a netlify frontend URL. The reason we have a separate file with the URL in it is that it allows us to update the frontend URL without redeploying the backend."]},{"l":"New backend commit in master","p":["A new backend commit usually also means a new frontend change is necessary. For this reason the following sections assume that's the case."]},{"l":"Public Portal Backend Upgrade","p":["Once the backend repo has been tagged on github, a docker image gets build on Docker Hub automatically. It can take ~ 5 min before the image is available. You can check here what the status of the builds is: https://github.com/cBioPortal/cbioportal/actions?query=workflow%3A%22Docker+Image+CI%22.","After that, if you have access to the kubernetes cluster you can change the image in the configuration of the kubernetes cluster:","https://github.com/knowledgesystems/knowledgesystems-k8s-deployment/blob/master/cbioportal/cbioportal_spring_boot.yaml","point this line, to the new tag on docker hub e.g.:","Make sure it is an image with the postfix -web-shenandoah. This is the image that only has the web part of cBioPortal and uses the shenandoah garbage collector.","Also remove the -Dfrontend.url parameter such that the frontend version inside the war will be used:","Then running this command applies the changes to the cluster:","You can keep track of what's happening by looking at the pods:","If you have the watch command installed you can also use that to see the output of this every 2s:","Another thing to look at is the events:","If there are any issues, point the image back to what it was, set-Dfrontend.url and run kubectl apply -f filename again.","If everything went ok, you can re-enable auto deployment on netlify, set-Dfrontend.url in the kubernetes file and run kubectl apply -f filename again.","Make sure to commit your changes to the knowledgesystems-k8s-deployment repo and push them to the main repo, so that other people making changes to the kubernetes config will be using the latest version."]},{"l":"Private Portal Backend Upgrade","p":["First update the frontend portal configuration to point to a new file. It's fine if this file does not exist yet, because if it doesn't the frontend bundled with the war will be used. We can later point the file to netlify, once we've determined everything looks ok.","You can use this for loop to update the frontend url in all properties files(set it to a file that doesn't exist yet and give it a sensible name e.g. frontend_url_version_x_y_z.txt):","Same for triage-tomcat (agin set the correct file name)::","Make sure you see the frontend url file updated correctly:","Then commit and push your changes to the mercurial repo:","If you have your public key added for the relevant deploy scripts you should be able to deploy with the following command on dashi-dev:","If you don't have a SSH key set up to run the deploy script ask Ino.","If everything looks ok you can update the frontend url file to point to netlify. Log in to dashi and become msk-tomcat with sudo su - msk-tomcat. Then change the update script:","to point oldurlfile=/srv/www/msk-tomcat/frontend_url_version_2_0_0.txt to the new frontend url file you supplied above.","Then update the url like:","Do the same thing on dashi2.","The last step is to modify the frontend url file for the triage portal. Log in to the pipelines machine, log in as triage-tomcat user: sudo su - triage-tomcat, and update the frontend url file there:"]},{"l":"Upgrading Related Backend Components","p":["Backend upgrades involving the database schema, DAO classes, etc. require updates to databases and importers. CBioPortal has multiple databases (located both internally on pipelines and in AWS) backing different portals. Similarly there are multiple importers responsible for loading portal-specific data. Every database must be manually migrated on an individual basis; all importers/data fetchers can be updated simultaenously through an existing deployment script.","Before upgrading, make sure to turn off import jobs in the crontab and alert the backend pipelines team (Avery, Angelica, Rob, Manda).","To access the crontab, log in to pipelines, log in as cbioportal_importer: sudo su - cbioportal_importer, and run crontab -e. Comment out any lines that run import jobs, save, and exit. Make sure to uncomment these lines once the upgrade (database and importers) is complete. Lines that need to be commented out will be under the Import Jobs section, shown here."]},{"l":"Updating Databases","p":["AWS","cbioportal.mskcc.org","cbioportal.org","cgds_gdac","cgds_genie","cgds_public","cgds_triage","Database","First, make sure there is a backup of the database being migrated. If there is not a weekly dump, backup the database being migrated using mysqldump. This process may take awhile depending on the size of the database.","genie.cbioportal.org","Location","pipelines","The second step is to migrate the database. Make sure that the migration script is the same version as the deployed cBioPortal website. It is recommended to first test the migration script manually line-by-line in a copy of the existing database. This will catch any data-related bugs that might not be captured by the python migration script. After testing is successful, migrate the production databases following these steps here.","These are all cBioPortal databases and their locations:","To obtain information such as usernames, passwords, hostnames - ask Avery, Angelica, Rob, Manda, and Ino.","triage.cbioportal.org","Website"]},{"i":"updating-importersdata-fetchers","l":"Updating Importers/Data Fetchers","p":["Importers (code found here) and data fetchers (code found here) use code from the cBioPortal codebase. The cbioportal dependency is packaged with the genome-nexus-annotation-pipeline and specified in the pipelines importer pom.","The following steps are used during releases/updates to build new importers with the most-up-to-date cBioPortal and genome-nexus-annotation-pipeline code. All steps should be performed on the pipelines machine.","Set the jitpack hash here in the genome-nexus-annotation-pipeline codebase to the most recent cbioportal/cbioportal commit hash in master.","Merge this change into genome-nexus-annotation-pipeline/master.","Set the commit hash here in the pipelines codebase to the most most recent genome-nexus/genome-nexus-annotation-pipeline commit hash (after merge specfied in step 2). Also ensure the db version in the pom here matches the db schema version in the cbioportal codebase.","Merge this change into pipelines/master.","Set the commit hash here in the cmo-pipelines codebase to the most recent genome-nexus/genome-nexus-annotation-pipeline commit hash (after merge specified in step 2)","Merge this change into cmo-pipelines/master","Run the deployment wrapper script. See details here.","Verify new importers/data fetchers have been placed in /data/portal-cron/lib by checking timestamps."]},{"l":"Deployment Script","p":["The wrapper script is found on pipelines here:/data/portal-cron/git-repos/pipelines-configuration/build-importer-jars/buildproductionjars.sh.","Run git pull to pull in any updates to the build script.","The wrapper script takes two arguments:","--cbioportal-git-hash (required): Set to the cBioPortal commit hash being used in the pipelines build (hash specified in step 1 of updating importers). This must match because the build copies out resource files (e.g application-context-business.xml) from the cbioportal codebase.","--skip-deployment (optional): Set to true to skip auto-deployment to /data/portal-cron/lib. Built jars will be found in /data/portal-cron/git-repos/pipelines-configuration/build-importer-jars/ and can be be manually moved.","The wrapper script will automatically backup the importers/data-fetchers to /data/portal-cron/lib/backup."]}],[{"l":"Documentation site","p":["This documentation site is created using https://retype.com/, a static site generator based on markdown.","Visit their site for installation instructions and a guide on how","Develop, build and test the site locally","Markdown syntax","Deploy to Github pages using Github actions."]},{"l":"Navigation","p":["Please note that the navigation for the site is defined in docs/SUMMARY.md. This is a deprecated form of configuration which is not documented in Retype."]}]]
\ No newline at end of file
diff --git a/rfc-list/index.html b/rfc-list/index.html
index f26f1419a60..1370ce8c9b8 100644
--- a/rfc-list/index.html
+++ b/rfc-list/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,11 +31,11 @@
-
+
-
+
-
+
diff --git a/session-service-management/index.html b/session-service-management/index.html
index 568c10a42b7..2d628eac1b9 100644
--- a/session-service-management/index.html
+++ b/session-service-management/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,12 +31,12 @@
-
+
-
+
-
-
+
+
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 71f7e6b4c38..ac99e9505f0 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ
diff --git a/software-acknowledgments/index.html b/software-acknowledgments/index.html
index b96a9ce9e7a..42f340cc167 100644
--- a/software-acknowledgments/index.html
+++ b/software-acknowledgments/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,11 +31,11 @@
-
+
-
+
-
+
diff --git a/study-curation-guide/index.html b/study-curation-guide/index.html
index eaec5b3ac12..837404cc867 100644
--- a/study-curation-guide/index.html
+++ b/study-curation-guide/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,11 +31,11 @@
-
+
-
+
-
+
diff --git a/testing/index.html b/testing/index.html
index 6f46bad2c54..3b6c8c20169 100644
--- a/testing/index.html
+++ b/testing/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,12 +31,12 @@
-
+
-
+
-
-
+
+
diff --git a/uninstall-docker-cbioportal/index.html b/uninstall-docker-cbioportal/index.html
index 8404a276cc4..2142a6f5912 100644
--- a/uninstall-docker-cbioportal/index.html
+++ b/uninstall-docker-cbioportal/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,12 +31,12 @@
-
+
-
+
-
-
+
+
diff --git a/updating-gene-and-gene_alias-tables/index.html b/updating-gene-and-gene_alias-tables/index.html
index 8eef88abff8..446b9b7e5da 100644
--- a/updating-gene-and-gene_alias-tables/index.html
+++ b/updating-gene-and-gene_alias-tables/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,12 +31,12 @@
-
+
-
+
-
-
+
+
diff --git a/updating-your-cbioportal-installation/index.html b/updating-your-cbioportal-installation/index.html
index 1dce7bcc5b2..6ea8164f5ea 100644
--- a/updating-your-cbioportal-installation/index.html
+++ b/updating-your-cbioportal-installation/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,12 +31,12 @@
-
+
-
+
-
-
+
+
diff --git a/user-guide/by-page/index.html b/user-guide/by-page/index.html
index be8f7898e83..c20eb09f5bd 100644
--- a/user-guide/by-page/index.html
+++ b/user-guide/by-page/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,11 +31,11 @@
-
+
-
+
-
+
diff --git a/user-guide/faq/index.html b/user-guide/faq/index.html
index 73d87b70e86..685a2a67466 100644
--- a/user-guide/faq/index.html
+++ b/user-guide/faq/index.html
@@ -3,7 +3,7 @@
-
+
@@ -33,11 +33,11 @@
-
+
-
+
-
+
diff --git a/user-guide/index.html b/user-guide/index.html
index 51972b4947c..910f6adafd6 100644
--- a/user-guide/index.html
+++ b/user-guide/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,11 +31,11 @@
-
+
-
+
-
+
diff --git a/user-guide/new-users/index.html b/user-guide/new-users/index.html
index 5818b8e0f39..49037725485 100644
--- a/user-guide/new-users/index.html
+++ b/user-guide/new-users/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,11 +31,11 @@
-
+
-
+
-
+
diff --git a/user-guide/oql/index.html b/user-guide/oql/index.html
index 0df50d7c1c5..7e968b6f2ff 100644
--- a/user-guide/oql/index.html
+++ b/user-guide/oql/index.html
@@ -3,7 +3,7 @@
-
+
@@ -33,11 +33,11 @@
-
+
-
+
-
+
diff --git a/user-guide/overview/index.html b/user-guide/overview/index.html
index 8f1219f8f20..2990d1b8a28 100644
--- a/user-guide/overview/index.html
+++ b/user-guide/overview/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,11 +31,11 @@
-
+
-
+
-
+
diff --git a/using-the-dataset-validator/index.html b/using-the-dataset-validator/index.html
index 9d59b99cee1..5b70a900875 100644
--- a/using-the-dataset-validator/index.html
+++ b/using-the-dataset-validator/index.html
@@ -3,7 +3,7 @@
-
+
@@ -33,12 +33,12 @@
-
+
-
+
-
-
+
+
diff --git a/using-the-metaimport-script/index.html b/using-the-metaimport-script/index.html
index ded8020cc08..4c905847370 100644
--- a/using-the-metaimport-script/index.html
+++ b/using-the-metaimport-script/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,11 +31,11 @@
-
+
-
+
-
+
diff --git a/web-api-and-clients/index.html b/web-api-and-clients/index.html
index 25c076b0ed6..854c1f2f37c 100644
--- a/web-api-and-clients/index.html
+++ b/web-api-and-clients/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,12 +31,12 @@
-
+
-
+
-
-
+
+
diff --git a/z-score-normalization-script/index.html b/z-score-normalization-script/index.html
index a4f7071f82a..d75047d3914 100644
--- a/z-score-normalization-script/index.html
+++ b/z-score-normalization-script/index.html
@@ -3,7 +3,7 @@
-
+
@@ -31,11 +31,11 @@
-
+
-
+
-
+