From 91dad047cb7050df8453931c111587d89b233912 Mon Sep 17 00:00:00 2001
From: Lydia Buntrock <lydia.buntrock@fu-berlin.de>
Date: Tue, 2 Aug 2022 16:52:52 +0200
Subject: [PATCH] [MISC] Create conda samtools environment

Signed-off-by: Lydia Buntrock <lydia.buntrock@fu-berlin.de>
---
 .../workflow/rules/callers.smk                         |  2 ++
 .../workflow/rules/callers.smk                         |  2 ++
 test/benchmark/envs/environment.yml                    |  2 --
 test/benchmark/envs/samtools.yaml                      |  5 +++--
 test/benchmark/envs/simulation.yaml                    | 10 ++++++++++
 test/benchmark/prepare_BAM_with_crossmap.sh            |  5 ++++-
 test/benchmark/prepare_truth_set_with_NCBI.sh          |  2 --
 7 files changed, 21 insertions(+), 7 deletions(-)
 create mode 100644 test/benchmark/envs/simulation.yaml

diff --git a/test/benchmark/caller_comparison_iGenVar_only/workflow/rules/callers.smk b/test/benchmark/caller_comparison_iGenVar_only/workflow/rules/callers.smk
index fb15e0d0..42c4502d 100644
--- a/test/benchmark/caller_comparison_iGenVar_only/workflow/rules/callers.smk
+++ b/test/benchmark/caller_comparison_iGenVar_only/workflow/rules/callers.smk
@@ -105,5 +105,7 @@ rule picard:
         vcf = "results/caller_comparison_iGenVar_only/{input_combination}/variants.vcf"
     log:
         "logs/caller_comparison_iGenVar_only/picard_output.{input_combination}.log"
+    conda:
+        "../../../envs/simulation.yaml"
     shell:
         "picard SortVcf -I {input.vcf} -O {output.vcf} -Xms1g -Xmx100g --TMP_DIR tmp/picard/ &>> {log}"
diff --git a/test/benchmark/caller_comparison_long_read/workflow/rules/callers.smk b/test/benchmark/caller_comparison_long_read/workflow/rules/callers.smk
index c4c0c270..51c863ba 100644
--- a/test/benchmark/caller_comparison_long_read/workflow/rules/callers.smk
+++ b/test/benchmark/caller_comparison_long_read/workflow/rules/callers.smk
@@ -84,6 +84,8 @@ rule picard:
         vcf = "results/caller_comparison_long_read/{dataset}/SVIM/variants.vcf"
     log:
         "logs/caller_comparison_long_read/SVIM/picard_output.{dataset}.log"
+    conda:
+        "../../../envs/simulation.yaml"
     shell:
         "picard SortVcf -I {input.vcf} -O {output.vcf} -Xms1g -Xmx100g --TMP_DIR tmp/picard/ &>> {log}"
         # The Xms and Xmx sets the java memory for avoiding "java.lang.OutOfMemoryError: GC overhead limit exceeded"
diff --git a/test/benchmark/envs/environment.yml b/test/benchmark/envs/environment.yml
index 151d5667..ae579eed 100644
--- a/test/benchmark/envs/environment.yml
+++ b/test/benchmark/envs/environment.yml
@@ -4,7 +4,6 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  # - bedops           # for vcf2bed
   - bgzip            # rule tabix - accepts only zipped files zipped with bgzip
   - crossmap         # for coordinate_conversion.sh
   - delly=1.0.3
@@ -12,7 +11,6 @@ dependencies:
   - mamba
   - matplotlib       # rule run_svim
   - pbsv=2.6.2
-  - picard           # rule picard
   - pip
   - python=3.6
   - r
diff --git a/test/benchmark/envs/samtools.yaml b/test/benchmark/envs/samtools.yaml
index 1e13e14f..97f605a3 100644
--- a/test/benchmark/envs/samtools.yaml
+++ b/test/benchmark/envs/samtools.yaml
@@ -1,5 +1,6 @@
+name: samtools
 channels:
  - bioconda
-#Sorting BAM files from NGMLR fails in samtools version>=1.10, therefore fix to version 1.9
+# Sorting BAM files from NGMLR fails in samtools version>=1.10, therefore fix to version 1.9
 dependencies:
- - samtools=1.9
+ - samtools #=1.9
diff --git a/test/benchmark/envs/simulation.yaml b/test/benchmark/envs/simulation.yaml
new file mode 100644
index 00000000..9357b6c1
--- /dev/null
+++ b/test/benchmark/envs/simulation.yaml
@@ -0,0 +1,10 @@
+name: simulation
+channels:
+  - defaults
+  - conda-forge
+  - bioconda
+dependencies:
+  - bwa>=0.7  # for aligning the simulated reads
+  - mamba
+  - mason     # for simulation Illumina short reads
+  - picard    # for sorting vcf files
diff --git a/test/benchmark/prepare_BAM_with_crossmap.sh b/test/benchmark/prepare_BAM_with_crossmap.sh
index 6946bf57..b216acb4 100644
--- a/test/benchmark/prepare_BAM_with_crossmap.sh
+++ b/test/benchmark/prepare_BAM_with_crossmap.sh
@@ -12,6 +12,9 @@ wget --retry-connrefused --waitretry=30 --read-timeout=30 --timeout=30 --tries=2
 
 echo "$(tput setaf 1)$(tput setab 7)------- CrossMap installed and prepared (5.1/9.6) --------$(tput sgr 0)" 1>&3
 
+conda env create -f Repos/iGenVar/test/benchmark/envs/samtools.yaml
+conda activate samtools
+
 # Illumina Mate Pair
 ## prepare reference file:
 ## reorder reference:
@@ -78,6 +81,6 @@ CrossMap.py bam hg19ToHg38.over.chain.gz long_reads/GRCh37/NA24385_phased_possor
     long_reads/GRCh38/NA24385_phased_possorted_bam.Hg38.bam
 CrossMap.py bam hg19ToHg38.over.chain.gz long_reads/GRCh37/NA24385_phased_possorted_bam.md.bam \
     long_reads/GRCh38/NA24385_phased_possorted_bam.md.Hg38.bam
-conda activate benchmarks
+conda activate iGenVar_benchmark
 
 echo "$(tput setaf 1)$(tput setab 7)------- BAM files prepared (5.3/9.6) --------$(tput sgr 0)" 1>&3
diff --git a/test/benchmark/prepare_truth_set_with_NCBI.sh b/test/benchmark/prepare_truth_set_with_NCBI.sh
index e652e24a..7fac7ecd 100644
--- a/test/benchmark/prepare_truth_set_with_NCBI.sh
+++ b/test/benchmark/prepare_truth_set_with_NCBI.sh
@@ -74,6 +74,4 @@ CrossMap.py bed hg19ToHg38.over.chain.gz truth_set/${TRUTH_SET}.renamed_chr.bed
 # remove chr from chromosome names again
 sed -e 's!chr!!' truth_set/${TRUTH_SET}.renamed_chr.Hg38.bed > truth_set/${TRUTH_SET}.Hg38.bed
 
-# convert2bed --input=vcf --insertions < NA24385.GRCh38.large_svs.vcf.gz > insertions.bed
-
 echo "$(tput setaf 1)$(tput setab 7)------- truth set files prepared (5.6/9.6) --------$(tput sgr 0)" 1>&3