Skip to content

Commit

Permalink
update bundle version and lof info
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed Jun 11, 2024
1 parent 84d6d43 commit 1958964
Show file tree
Hide file tree
Showing 16 changed files with 53 additions and 35 deletions.
6 changes: 3 additions & 3 deletions pcgr/pcgr_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pcgr._version import __version__

PCGR_VERSION = __version__
DB_VERSION = '20240530'
DB_VERSION = '20240610'

## MISCELLANEOUS
NCBI_BUILD_MAF = 'GRCh38'
Expand All @@ -12,13 +12,13 @@
RECOMMENDED_N_MUT_SIGNATURE = 200

## GENCODE
GENCODE_VERSION = {'grch38': 45,'grch37': 19}
GENCODE_VERSION = {'grch38': 46,'grch37': 19}

## vcfanno
VCFANNO_MAX_PROC = 15

## VEP settings/versions
VEP_VERSION = '111'
VEP_VERSION = '112'
VEP_ASSEMBLY = {'grch38': 'GRCh38','grch37': 'GRCh37'}
VEP_MIN_FORKS = 1
VEP_MAX_FORKS = 8
Expand Down
15 changes: 3 additions & 12 deletions pcgr/vep.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@
import csv
import gzip

from pcgr import annoutils, utils
from pcgr.annoutils import assign_cds_exon_intron_annotations
from pcgr import pcgr_vars
from pcgr.utils import getlogger


from pcgr.utils import getlogger, check_file_exists, get_perl_exports

def get_vep_command(file_paths, conf_options, input_vcf, output_vcf, debug = False):

Expand All @@ -21,14 +18,9 @@ def get_vep_command(file_paths, conf_options, input_vcf, output_vcf, debug = Fal
file_paths['refdata_assembly_dir'],
'misc','fasta','assembly',
f'Homo_sapiens.{pcgr_vars.VEP_ASSEMBLY[genome_assembly]}.dna.primary_assembly.fa.gz')
ancestor_assembly = os.path.join(
file_paths['refdata_assembly_dir'],
'misc','fasta','ancestor',
f'human_ancestor.fa.gz')

logger = getlogger('check-fasta-files')
utils.check_file_exists(fasta_assembly, logger = logger)
utils.check_file_exists(ancestor_assembly, logger = logger)
check_file_exists(fasta_assembly, logger = logger)

plugins_in_use = "NearestExonJB"

Expand Down Expand Up @@ -58,7 +50,7 @@ def get_vep_command(file_paths, conf_options, input_vcf, output_vcf, debug = Fal
gencode_set_in_use = "GENCODE - basic transcript set (--gencode_basic)"

# Compose full VEP command
vep_main_command = f'{utils.get_perl_exports()} && vep --input_file {input_vcf} --output_file {output_vcf} {vep_options}'
vep_main_command = f'{get_perl_exports()} && vep --input_file {input_vcf} --output_file {output_vcf} {vep_options}'
vep_bgzip_command = f'bgzip -f -c {output_vcf} > {output_vcf_gz}'
vep_tabix_command = f'tabix -f -p vcf {output_vcf_gz}'
if debug:
Expand All @@ -71,7 +63,6 @@ def get_vep_command(file_paths, conf_options, input_vcf, output_vcf, debug = Fal
vep_cmd['gencode_set_in_use'] = gencode_set_in_use
vep_cmd['plugins_in_use'] = plugins_in_use
vep_cmd['fasta_assembly'] = fasta_assembly
#vep_cmd['GENCODE_VERSION'] = 'release ' + str(pcgr_vars.GENCODE_VERSION[genome_assembly])

return(vep_cmd)

Expand Down
1 change: 0 additions & 1 deletion pcgrr/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ Package: pcgrr
Type: Package
Title: Personal Cancer Genome ReporteR
Version: 1.4.1.9010
Date: 2024-04-27
Authors@R:
c(person(given = "Sigve",
family = "Nakken",
Expand Down
56 changes: 42 additions & 14 deletions pcgrr/data-raw/data-raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ data_coltype_defs[['snv_indel_somatic_raw']] <- readr::cols_only(
CONSEQUENCE = readr::col_character(),
IMPACT = readr::col_character(),
LOSS_OF_FUNCTION = readr::col_logical(),
LOF_FILTER = readr::col_character(),
SPLICE_DONOR_RELEVANT = readr::col_logical(),
NULL_VARIANT = readr::col_logical(),
CODING_STATUS = readr::col_character(),
Expand All @@ -216,6 +217,7 @@ data_coltype_defs[['snv_indel_somatic_raw']] <- readr::cols_only(
HGVSc = readr::col_character(),
HGVSp = readr::col_character(),
CDS_CHANGE = readr::col_character(),
CDS_RELATIVE_POSITION = readr::col_character(),
EXON = readr::col_character(),
EXON_AFFECTED = readr::col_character(),
MUTATION_HOTSPOT = readr::col_character(),
Expand Down Expand Up @@ -306,6 +308,7 @@ data_coltype_defs[['snv_indel_germline_raw']] <- readr::cols_only(
CONSEQUENCE = readr::col_character(),
IMPACT = readr::col_character(),
LOSS_OF_FUNCTION = readr::col_logical(),
LOF_FILTER = readr::col_character(),
SPLICE_DONOR_RELEVANT = readr::col_logical(),
NULL_VARIANT = readr::col_logical(),
CODING_STATUS = readr::col_character(),
Expand All @@ -315,6 +318,7 @@ data_coltype_defs[['snv_indel_germline_raw']] <- readr::cols_only(
HGVSc = readr::col_character(),
HGVSp = readr::col_character(),
CDS_CHANGE = readr::col_character(),
CDS_RELATIVE_POSITION = readr::col_character(),
EXON = readr::col_character(),
EXON_AFFECTED = readr::col_integer(),
EXON_POSITION = readr::col_integer(),
Expand Down Expand Up @@ -487,6 +491,7 @@ tsv_cols[['snv_indel']] <-
'CONSEQUENCE',
'PFAM_DOMAIN_NAME',
'LOSS_OF_FUNCTION',
'LOF_FILTER',
'CDS_CHANGE',
'CODING_STATUS',
'EXONIC_STATUS',
Expand Down Expand Up @@ -647,6 +652,7 @@ dt_display[['snv_indel_gene_actionable']] <-
'HGVSp',
'PREDICTED_EFFECT',
'LOSS_OF_FUNCTION',
'LOF_FILTER',
'ONCOGENICITY',
'ONCOGENICITY_CODE',
'ONCOGENICITY_SCORE',
Expand Down Expand Up @@ -701,6 +707,8 @@ dt_display[['snv_indel_tier3']] <-
'HGVSc',
'HGVSp',
'MUTATION_HOTSPOT_CANCERTYPE',
'LOSS_OF_FUNCTION',
'LOF_FILTER',
'TCGA_FREQUENCY',
'PREDICTED_EFFECT',
'ONCOGENICITY_CODE',
Expand Down Expand Up @@ -739,6 +747,8 @@ dt_display[['tier4']] <-
'HGVSc',
'HGVSp',
'PREDICTED_EFFECT',
'LOSS_OF_FUNCTION',
'LOF_FILTER',
'REGULATORY_ANNOTATION',
'ONCOGENICITY_CODE',
'ONCOGENICITY_SCORE',
Expand Down Expand Up @@ -961,22 +971,40 @@ usethis::use_data(cosmic_sbs_signatures, overwrite = T)
#usethis::use_data(cosmic_sbs_signatures_all, overwrite = T)
#usethis::use_data(cosmic_sbs_signatures_no_artefacts, overwrite = T)

immune_celltypes <- as.data.frame(
immunedeconv::cell_type_map |>
dplyr::filter(method_dataset == "quantiseq") |>
dplyr::select(method_cell_type, cell_type) |>
dplyr::mutate(cell_type = dplyr::if_else(
!is.na(cell_type) &
cell_type == "uncharacterized cell",
"Uncharacterized cell",
as.character(cell_type)
)) |>
dplyr::mutate(cell_type = factor(
cell_type, levels = cell_type)) |>
dplyr::distinct()
# immune_celltypes <- as.data.frame(
# immunedeconv::cell_type_map |>
# dplyr::filter(method_dataset == "quantiseq") |>
# dplyr::select(method_cell_type, cell_type) |>
# dplyr::mutate(cell_type = dplyr::if_else(
# !is.na(cell_type) &
# cell_type == "uncharacterized cell",
# "Uncharacterized cell",
# as.character(cell_type)
# )) |>
# dplyr::mutate(cell_type = factor(
# cell_type, levels = cell_type)) |>
# dplyr::distinct()
# )

immune_celltypes2 <- data.frame(
method_cell_type = c("B.cells","Macrophages.M1",
"Macrophages.M2","Monocytes",
"Neutrophils","NK.cells",
"T.cells.CD4","T.cells.CD8",
"Tregs","Dendritic.cells",
"Other"),
cell_type = c("B cell","Macrophage M1",
"Macrophage M2","Monocyte",
"Neutrophil","NK cell",
"T cell CD4+ (non-regulatory)",
"T cell CD8+",
"T cell regulatory (Tregs)",
"Myeloid dendritic cell",
"Uncharacterized cell")
)

usethis::use_data(immune_celltypes, overwrite = T)

#usethis::use_data(immune_celltypes, overwrite = T)

germline_filter_levels <-
c("SOMATIC",
Expand Down
Binary file modified pcgrr/data/biomarker_evidence.rda
Binary file not shown.
Binary file modified pcgrr/data/cancer_phenotypes_regex.rda
Binary file not shown.
Binary file modified pcgrr/data/color_palette.rda
Binary file not shown.
Binary file modified pcgrr/data/cosmic_sbs_signatures.rda
Binary file not shown.
Binary file modified pcgrr/data/data_coltype_defs.rda
Binary file not shown.
Binary file modified pcgrr/data/dt_display.rda
Binary file not shown.
Binary file modified pcgrr/data/effect_prediction_algos.rda
Binary file not shown.
Binary file modified pcgrr/data/germline_filter_levels.rda
Binary file not shown.
Binary file modified pcgrr/data/tcga_cohorts.rda
Binary file not shown.
Binary file modified pcgrr/data/tsv_cols.rda
Binary file not shown.
Binary file modified pcgrr/data/variant_db_url.rda
Binary file not shown.
10 changes: 5 additions & 5 deletions pcgrr/vignettes/annotation_resources.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ output: rmarkdown::html_document
---

### Basic variant consequence annotation
* [VEP](http://www.ensembl.org/info/docs/tools/vep/index.html) - Variant Effect Predictor release 111 ([GENCODE v45](https://www.gencodegenes.org/human/) as gene reference database (v19 for grch37))
* [VEP](http://www.ensembl.org/info/docs/tools/vep/index.html) - Variant Effect Predictor release 112 ([GENCODE v46](https://www.gencodegenes.org/human/) as gene reference database (v19 for grch37))

### *Insilico* predictions of effect of coding variants
* [dBNSFP](https://sites.google.com/site/jpopgen/dbNSFP) - database of non-synonymous functional predictions (v4.4, May 2023)
* [dBNSFP](https://sites.google.com/site/jpopgen/dbNSFP) - database of non-synonymous functional predictions (v4.5, November 2023)

### Variant frequency databases
* [gnomAD](http://exac.broadinstitute.org/) - germline variant frequencies exome-wide (r2.1, October 2018)
Expand All @@ -16,12 +16,12 @@ output: rmarkdown::html_document
* [TCGA](https://portal.gdc.cancer.gov/) - somatic mutations discovered across 33 tumor type cohorts (release 39.0, December 2023)

### Variant databases of clinical utility
* [ClinVar](http://www.ncbi.nlm.nih.gov/clinvar/) - database of clinically related variants (May 2024)
* [CIViC](http://civic.genome.wustl.edu) - clinical interpretations of variants in cancer (May 23rd 2024)
* [ClinVar](http://www.ncbi.nlm.nih.gov/clinvar/) - database of clinically related variants (June 2024)
* [CIViC](http://civic.genome.wustl.edu) - clinical interpretations of variants in cancer (June 6th 2024)
* [CGI](http://www.cancergenomeinterpreter.org/biomarkers) - Cancer Genome Interpreter Cancer Biomarkers Database (CGI) (October 18th 2022)

### Protein domains/functional features
* [UniProt/SwissProt KnowledgeBase](http://www.uniprot.org) - resource on protein sequence and functional information (2024_02)
* [UniProt/SwissProt KnowledgeBase](http://www.uniprot.org) - resource on protein sequence and functional information (2024_03)
* [Pfam](http://pfam.xfam.org) - database of protein families and domains (v35.0, November 2021)

### Knowledge resources on gene and protein targets
Expand Down

0 comments on commit 1958964

Please sign in to comment.