Skip to content

Commit

Permalink
Added ChromHMM GET_OUTPUT
Browse files Browse the repository at this point in the history
  • Loading branch information
LeonHafner committed Dec 7, 2023
1 parent 8820918 commit 60a6bc9
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 8 deletions.
47 changes: 47 additions & 0 deletions bin/get_chromhmm_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python
# coding: utf-8

import argparse
import pandas as pd
import numpy as np


parser = argparse.ArgumentParser(description="Process ChromHMM output into bed file of predicted enhancers")

parser.add_argument("-e", "--emissions", type=str, required=True, help="Path to emission file")
parser.add_argument("-b", "--bed", type=str, required=True, help="Path to bed file")
parser.add_argument("-t", "--threshold", type=float, required=False, default=0.9, help="Threshold for state emissions")
parser.add_argument("-m", "--markers", nargs='+', required=False, default=["H3K27ac", "H3K4me3"], help="ChIP-Seq markers that indicate an enhancer")
parser.add_argument("-o", "--output", type=str, required=True, help="Path to output bed with enhancer positions")

args = parser.parse_args()

path_emissions = args.emissions
path_bed = args.bed
threshold = args.threshold
markers = args.markers
output = args.output


# Read emissions file for the provided markers
emissions = pd.read_csv(path_emissions, sep = "\t")[["State (Emission order)"] + markers].rename(columns={"State (Emission order)": "State"})


# Read input bed file and remove unecessary columns
bed = pd.read_csv(path_bed,
sep="\t",
skiprows=1,
names=["chr", "start", "end", "state", "score", "strand", "start_1", "end_1", "rgb"]
).drop(columns=["strand", "score", "start_1", "end_1", "rgb"])


# Keep state if any of the markers is enriched > threshold for this state
states = emissions[np.any([emissions[marker] >= threshold for marker in markers], axis=0)]["State"].tolist()


# Subset bed file for selected states
out_bed = bed[np.isin(bed["state"], states)].drop(columns=["state"])

# Write output
out_bed.to_csv(output, index=False, sep="\t", header=False)

12 changes: 9 additions & 3 deletions modules/local/chromhmm/binarize_bams.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,25 @@ process BINARIZE_BAMS {
input:
path cellmarkfiletable
path bams, stageAs: "reformatted_bams/*"
val organism

output:
path "binarized_bams"

script:
"""
//TODO: Get parameters for organism
java -jar ${projectDir}/assets/ChromHMM.jar BinarizeBam \
${projectDir}/assets/CHROMSIZES/mm10.txt \
${projectDir}/assets/CHROMSIZES/"${organism}".txt \
reformatted_bams \
$cellmarkfiletable \
binarized_bams
"""

stub:
"""
mkdir binarized_bams
touch binarized_bams/chr1.txt
touch binarized_bams/chr2.txt
"""
}
23 changes: 23 additions & 0 deletions modules/local/chromhmm/get_output.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
process GET_OUTPUT {

container "quay.io/biocontainers/pandas:1.4.3"

input:
tuple path(emissions), path(bed)

output:
path "enhancers_${bed.baseName.split('_')[0]}.bed"

script:
"""
get_chromhmm_results.py \
--emissions $emissions \
--bed $bed \
--output enhancers_${bed.baseName.split('_')[0]}.bed
"""

stub:
"""
touch enhancers_${bed.baseName.split('_')[0]}.bed
"""
}
20 changes: 17 additions & 3 deletions modules/local/chromhmm/learn_model.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,33 @@ process LEARN_MODEL {

input:
path binarized_bams
val states
val organism

output:
path "ChromHMM_output"
path "ChromHMM_output/emissions_${states}.txt", emit: emissions
path "ChromHMM_output/*_${states}_dense.bed", emit: beds

script:
"""
//TODO: Check for parameters for the number of states (default 10) and organism
java -jar ${projectDir}/assets/ChromHMM.jar LearnModel \
-p $task.cpus \
$binarized_bams \
ChromHMM_output \
10 \
mm10
$states \
$organism
"""

stub:
"""
mkdir ChromHMM_output
touch ChromHMM_output/emissions_${states}.txt
touch ChromHMM_output/L1_${states}_dense.bed
touch ChromHMM_output/L10_${states}_dense.bed
touch ChromHMM_output/P6_${states}_dense.bed
touch ChromHMM_output/P13_${states}_dense.bed
"""
}

5 changes: 5 additions & 0 deletions modules/local/chromhmm/make_cellmarkfiletable.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,9 @@ process MAKE_CELLMARKFILETABLE {
"""
make_cellmarkfiletable.py --input_dir $bamDirectory --output cellmarkfiletable.txt
"""

stub:
"""
touch cellmarkfiletable.txt
"""
}
5 changes: 5 additions & 0 deletions modules/local/chromhmm/reformat_bam.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,10 @@ process REFORMAT_BAM {
"""
reformat_bam.sh $bamFileIn ${bamFileIn.baseName}_reformatted.bam
"""

stub:
"""
touch ${bamFileIn.baseName}_reformatted.bam
"""
}

12 changes: 10 additions & 2 deletions subworkflows/local/chromhmm.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@ include { REFORMAT_BAM } from "../../modules/local/chromhmm/reformat_bam"
include { SAMTOOLS_INDEX as INDEX_BAM } from "../../modules/nf-core/samtools/index/main"
include { BINARIZE_BAMS } from "../../modules/local/chromhmm/binarize_bams"
include { LEARN_MODEL } from "../../modules/local/chromhmm/learn_model"
include { GET_OUTPUT } from "../../modules/local/chromhmm/get_output"



workflow CHROMHMM {
take:
raw_bams
chromhmm_states // default to 10
organism // mm10


main:
ch_bams_raw = Channel.fromPath("${raw_bams}/*/*.bam")
Expand All @@ -19,7 +23,11 @@ workflow CHROMHMM {

INDEX_BAM(REFORMAT_BAM.out)

BINARIZE_BAMS(MAKE_CELLMARKFILETABLE.out, INDEX_BAM.out.bai.collect())
BINARIZE_BAMS(MAKE_CELLMARKFILETABLE.out, INDEX_BAM.out.bai.collect(), organism)

LEARN_MODEL(BINARIZE_BAMS.out, chromhmm_states, organism)

ch_emission_bed = LEARN_MODEL.out.emissions.combine(LEARN_MODEL.out.beds.flatten())

LEARN_MODEL(BINARIZE_BAMS.out)
GET_OUTPUT(ch_emission_bed)
}

0 comments on commit 60a6bc9

Please sign in to comment.