Skip to content

Commit

Permalink
Merge pull request #236 from sigven/install_scripts
Browse files Browse the repository at this point in the history
Fix for "SyntaxWarning: invalid escape sequence"
  • Loading branch information
sigven authored Jun 13, 2024
2 parents 59a849d + d7c0b74 commit 8da9057
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 5 deletions.
6 changes: 6 additions & 0 deletions install/0-download_vep_cache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
GENOME="GRCh38" # or "GRCh37"
VEP_VERSION="112"
CACHE="homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz"

wget https://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/indexed_vep_cache/${CACHE}
gzip -dc ${CACHE} | tar xvf -
6 changes: 6 additions & 0 deletions install/1-download_pcgr_refdata.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
GENOME="grch38" # or "grch37"
BUNDLE_VERSION="20240610"
BUNDLE="pcgr_ref_data.${BUNDLE_VERSION}.${GENOME}.tgz"

wget https://insilico.hpc.uio.no/pcgr/${BUNDLE}
gzip -dc ${BUNDLE} | tar xvf -
10 changes: 5 additions & 5 deletions scripts/cpsr_validate_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ def get_valid_custom_genelist(genelist_fname, genelist_bed_fname, refdata_assemb
logger.info('Creating BED file with custom target genes: ' + str(genelist_bed_fname))
id_pat = '|'.join([f"{g}" for g in valid_custom_identifiers])

id_pat_ext = id_pat + '|(\|tag\|)|' + 'ACMG_SF'
id_pat_ext = id_pat + '|(\\|tag\\|)|' + 'ACMG_SF'
awk_command = "awk 'BEGIN{FS=\"\\t\"}{if($4 !~ /ACMG_SF/ || ($4 ~ /ACMG_SF/ && $4 ~ /" + '|'.join(valid_custom_identifiers) + "/))print;}'"
cmd_target_regions_bed = f"bgzip -dc {virtualpanel_track_bed} | egrep '{id_pat_ext}' > {genelist_bed_fname_unsorted}"
if gwas_findings == 0 and secondary_findings == 1:
cmd_target_regions_bed = f"bgzip -dc {virtualpanel_track_bed} | egrep '{id_pat_ext}' | egrep -v '(\|tag\|)' > {genelist_bed_fname_unsorted}"
cmd_target_regions_bed = f"bgzip -dc {virtualpanel_track_bed} | egrep '{id_pat_ext}' | egrep -v '(\\|tag\\|)' > {genelist_bed_fname_unsorted}"
if gwas_findings == 0 and secondary_findings == 0:
cmd_target_regions_bed = f"bgzip -dc {virtualpanel_track_bed} | egrep '{id_pat_ext}' | egrep -v '(\|tag\|)' | {awk_command} > {genelist_bed_fname_unsorted}"
cmd_target_regions_bed = f"bgzip -dc {virtualpanel_track_bed} | egrep '{id_pat_ext}' | egrep -v '(\\|tag\\|)' | {awk_command} > {genelist_bed_fname_unsorted}"
if gwas_findings == 1 and secondary_findings == 0:
cmd_target_regions_bed = f"bgzip -dc {virtualpanel_track_bed} | egrep '{id_pat_ext}' | {awk_command} > {genelist_bed_fname_unsorted}"

Expand Down Expand Up @@ -237,9 +237,9 @@ def simplify_vcf(input_vcf, validated_vcf, vcf, custom_bed, refdata_assembly_dir
## be part of the secondary findings list)
awk_command = "awk 'BEGIN{FS=\"\\t\"}{if($4 !~ /ACMG_SF/ || ($4 ~ /ACMG_SF/ && $4 ~ /" + str(ge_panel_identifier) + ":/))print;}'"
if gwas_findings == 0 and secondary_findings == 1:
check_subprocess(logger, f'bgzip -dc {target_bed_gz} | egrep -v "(\|tag\|)" >> {virtual_panels_tmp_bed}', debug)
check_subprocess(logger, f'bgzip -dc {target_bed_gz} | egrep -v "(\\|tag\\|)" >> {virtual_panels_tmp_bed}', debug)
elif gwas_findings == 0 and secondary_findings == 0:
check_subprocess(logger, f'bgzip -dc {target_bed_gz} | egrep -v "(\|tag\|)" | {awk_command} >> {virtual_panels_tmp_bed}', debug)
check_subprocess(logger, f'bgzip -dc {target_bed_gz} | egrep -v "(\\|tag\\|)" | {awk_command} >> {virtual_panels_tmp_bed}', debug)
elif gwas_findings == 1 and secondary_findings == 0:
check_subprocess(logger, f'bgzip -dc {target_bed_gz} | {awk_command} >> {virtual_panels_tmp_bed}', debug)
else:
Expand Down

0 comments on commit 8da9057

Please sign in to comment.