Added ChromHMM GET_OUTPUT

daisybio · Dec 7, 2023 · 60a6bc9 · 60a6bc9
1 parent 8820918
commit 60a6bc9
Show file tree

Hide file tree

Showing 7 changed files with 116 additions and 8 deletions.
diff --git a/bin/get_chromhmm_results.py b/bin/get_chromhmm_results.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+import argparse
+import pandas as pd
+import numpy as np
+
+
+parser = argparse.ArgumentParser(description="Process ChromHMM output into bed file of predicted enhancers")
+
+parser.add_argument("-e", "--emissions", type=str, required=True, help="Path to emission file")
+parser.add_argument("-b", "--bed", type=str, required=True, help="Path to bed file")
+parser.add_argument("-t", "--threshold", type=float, required=False, default=0.9, help="Threshold for state emissions")
+parser.add_argument("-m", "--markers", nargs='+', required=False, default=["H3K27ac", "H3K4me3"], help="ChIP-Seq markers that indicate an enhancer")
+parser.add_argument("-o", "--output", type=str, required=True, help="Path to output bed with enhancer positions")
+
+args = parser.parse_args()
+
+path_emissions = args.emissions
+path_bed = args.bed
+threshold = args.threshold
+markers = args.markers
+output = args.output
+
+
+# Read emissions file for the provided markers
+emissions = pd.read_csv(path_emissions, sep = "\t")[["State (Emission order)"] + markers].rename(columns={"State (Emission order)": "State"})
+
+
+# Read input bed file and remove unecessary columns
+bed = pd.read_csv(path_bed,
+                  sep="\t",
+                  skiprows=1,
+                  names=["chr", "start", "end", "state", "score", "strand", "start_1", "end_1", "rgb"]
+                 ).drop(columns=["strand", "score", "start_1", "end_1", "rgb"])
+
+
+# Keep state if any of the markers is enriched > threshold for this state
+states = emissions[np.any([emissions[marker] >= threshold for marker in markers], axis=0)]["State"].tolist()
+
+
+# Subset bed file for selected states
+out_bed = bed[np.isin(bed["state"], states)].drop(columns=["state"])
+
+# Write output
+out_bed.to_csv(output, index=False, sep="\t", header=False)
+
diff --git a/modules/local/chromhmm/binarize_bams.nf b/modules/local/chromhmm/binarize_bams.nf
@@ -5,19 +5,25 @@ process BINARIZE_BAMS {
     input:
     path cellmarkfiletable
     path bams, stageAs: "reformatted_bams/*"
+	val organism
 
     output:
     path "binarized_bams"
 
     script:
     """
-	//TODO: Get parameters for organism
-	
 	java -jar ${projectDir}/assets/ChromHMM.jar BinarizeBam \
-		${projectDir}/assets/CHROMSIZES/mm10.txt \
+		${projectDir}/assets/CHROMSIZES/"${organism}".txt \
 		reformatted_bams \
 		$cellmarkfiletable \
 		binarized_bams
     
 	"""
+
+	stub:
+	"""
+	mkdir binarized_bams
+	touch binarized_bams/chr1.txt
+	touch binarized_bams/chr2.txt
+	"""
 }
diff --git a/modules/local/chromhmm/get_output.nf b/modules/local/chromhmm/get_output.nf
@@ -0,0 +1,23 @@
+process GET_OUTPUT {
+
+    container "quay.io/biocontainers/pandas:1.4.3"
+
+    input:
+    tuple path(emissions), path(bed)
+
+    output:
+	path "enhancers_${bed.baseName.split('_')[0]}.bed"
+
+	script:
+	"""
+	get_chromhmm_results.py \
+		--emissions $emissions \
+		--bed $bed \
+		--output enhancers_${bed.baseName.split('_')[0]}.bed
+	"""
+
+	stub:
+	"""
+	touch enhancers_${bed.baseName.split('_')[0]}.bed
+	"""
+}
diff --git a/modules/local/chromhmm/learn_model.nf b/modules/local/chromhmm/learn_model.nf
@@ -4,19 +4,33 @@ process LEARN_MODEL {
 
     input:
     path binarized_bams
+    val states
+    val organism
 
     output:
-    path "ChromHMM_output"
+    path "ChromHMM_output/emissions_${states}.txt", emit: emissions
+	path "ChromHMM_output/*_${states}_dense.bed", emit: beds
 
     script:
     """
     //TODO: Check for parameters for the number of states (default 10) and organism
 
 	java -jar ${projectDir}/assets/ChromHMM.jar LearnModel \
+        -p $task.cpus \
 		$binarized_bams \
 		ChromHMM_output \
-		10 \
-		mm10
+		$states \
+		$organism
     """
+
+    stub:
+	"""
+	mkdir ChromHMM_output
+	touch ChromHMM_output/emissions_${states}.txt
+	touch ChromHMM_output/L1_${states}_dense.bed
+	touch ChromHMM_output/L10_${states}_dense.bed
+	touch ChromHMM_output/P6_${states}_dense.bed
+	touch ChromHMM_output/P13_${states}_dense.bed
+	"""
 }
 
diff --git a/modules/local/chromhmm/make_cellmarkfiletable.nf b/modules/local/chromhmm/make_cellmarkfiletable.nf
@@ -12,4 +12,9 @@ process MAKE_CELLMARKFILETABLE {
     """
     make_cellmarkfiletable.py --input_dir $bamDirectory --output cellmarkfiletable.txt
     """
+
+    stub:
+	"""
+	touch cellmarkfiletable.txt
+	"""
 }
diff --git a/modules/local/chromhmm/reformat_bam.nf b/modules/local/chromhmm/reformat_bam.nf
@@ -12,5 +12,10 @@ process REFORMAT_BAM {
     """
     reformat_bam.sh $bamFileIn ${bamFileIn.baseName}_reformatted.bam
     """
+
+	stub:
+	"""
+	touch ${bamFileIn.baseName}_reformatted.bam
+	"""
 }
 
diff --git a/subworkflows/local/chromhmm.nf b/subworkflows/local/chromhmm.nf
@@ -3,12 +3,16 @@ include { REFORMAT_BAM } from "../../modules/local/chromhmm/reformat_bam"
 include { SAMTOOLS_INDEX as INDEX_BAM } from "../../modules/nf-core/samtools/index/main"
 include { BINARIZE_BAMS } from "../../modules/local/chromhmm/binarize_bams"
 include { LEARN_MODEL } from "../../modules/local/chromhmm/learn_model"
+include { GET_OUTPUT } from "../../modules/local/chromhmm/get_output"
 
 
 
 workflow CHROMHMM {
 	take:
 		raw_bams
+		chromhmm_states // default to 10
+		organism // mm10
+
 
 	main:
 		ch_bams_raw = Channel.fromPath("${raw_bams}/*/*.bam")
@@ -19,7 +23,11 @@ workflow CHROMHMM {
 
     	INDEX_BAM(REFORMAT_BAM.out)
 
-    	BINARIZE_BAMS(MAKE_CELLMARKFILETABLE.out, INDEX_BAM.out.bai.collect())
+    	BINARIZE_BAMS(MAKE_CELLMARKFILETABLE.out, INDEX_BAM.out.bai.collect(), organism)
+
+    	LEARN_MODEL(BINARIZE_BAMS.out, chromhmm_states, organism)
+
+		ch_emission_bed = LEARN_MODEL.out.emissions.combine(LEARN_MODEL.out.beds.flatten())
 
-    	LEARN_MODEL(BINARIZE_BAMS.out)
+		GET_OUTPUT(ch_emission_bed)
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,4 +12,9 @@ process MAKE_CELLMARKFILETABLE { @@
         """
         make_cellmarkfiletable.py --input_dir $bamDirectory --output cellmarkfiletable.txt
         """
+        stub:
+    	"""
+    	touch cellmarkfiletable.txt
+    	"""
     }