-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8820918
commit 60a6bc9
Showing
7 changed files
with
116 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/env python | ||
# coding: utf-8 | ||
|
||
import argparse | ||
import pandas as pd | ||
import numpy as np | ||
|
||
|
||
parser = argparse.ArgumentParser(description="Process ChromHMM output into bed file of predicted enhancers") | ||
|
||
parser.add_argument("-e", "--emissions", type=str, required=True, help="Path to emission file") | ||
parser.add_argument("-b", "--bed", type=str, required=True, help="Path to bed file") | ||
parser.add_argument("-t", "--threshold", type=float, required=False, default=0.9, help="Threshold for state emissions") | ||
parser.add_argument("-m", "--markers", nargs='+', required=False, default=["H3K27ac", "H3K4me3"], help="ChIP-Seq markers that indicate an enhancer") | ||
parser.add_argument("-o", "--output", type=str, required=True, help="Path to output bed with enhancer positions") | ||
|
||
args = parser.parse_args() | ||
|
||
path_emissions = args.emissions | ||
path_bed = args.bed | ||
threshold = args.threshold | ||
markers = args.markers | ||
output = args.output | ||
|
||
|
||
# Read emissions file for the provided markers | ||
emissions = pd.read_csv(path_emissions, sep = "\t")[["State (Emission order)"] + markers].rename(columns={"State (Emission order)": "State"}) | ||
|
||
|
||
# Read input bed file and remove unecessary columns | ||
bed = pd.read_csv(path_bed, | ||
sep="\t", | ||
skiprows=1, | ||
names=["chr", "start", "end", "state", "score", "strand", "start_1", "end_1", "rgb"] | ||
).drop(columns=["strand", "score", "start_1", "end_1", "rgb"]) | ||
|
||
|
||
# Keep state if any of the markers is enriched > threshold for this state | ||
states = emissions[np.any([emissions[marker] >= threshold for marker in markers], axis=0)]["State"].tolist() | ||
|
||
|
||
# Subset bed file for selected states | ||
out_bed = bed[np.isin(bed["state"], states)].drop(columns=["state"]) | ||
|
||
# Write output | ||
out_bed.to_csv(output, index=False, sep="\t", header=False) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
process GET_OUTPUT { | ||
|
||
container "quay.io/biocontainers/pandas:1.4.3" | ||
|
||
input: | ||
tuple path(emissions), path(bed) | ||
|
||
output: | ||
path "enhancers_${bed.baseName.split('_')[0]}.bed" | ||
|
||
script: | ||
""" | ||
get_chromhmm_results.py \ | ||
--emissions $emissions \ | ||
--bed $bed \ | ||
--output enhancers_${bed.baseName.split('_')[0]}.bed | ||
""" | ||
|
||
stub: | ||
""" | ||
touch enhancers_${bed.baseName.split('_')[0]}.bed | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters