daisybio · nictru · Jan 24, 2024 · Nov 30, 2023 · Nov 30, 2023 · Dec 7, 2023
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,5 @@ results/
 testing/
 testing*
 *.pyc
+*.swp
+runner/
diff --git a/.prettierignore b/.prettierignore
@@ -10,3 +10,4 @@ testing/
 testing*
 *.pyc
 bin/
+runner/
diff --git a/bin/ChromHMM.jar b/bin/ChromHMM.jar
diff --git a/bin/combine_tables.py b/bin/combine_tables.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import argparse
 import numpy as np

diff --git a/bin/get_chromhmm_results.py b/bin/get_chromhmm_results.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+import argparse
+import pandas as pd
+import numpy as np
+
+
+parser = argparse.ArgumentParser(description="Process ChromHMM output into bed file of predicted enhancers")
+
+parser.add_argument("-e", "--emissions", type=str, required=True, help="Path to emission file")
+parser.add_argument("-b", "--bed", type=str, required=True, help="Path to bed file")
+parser.add_argument("-t", "--threshold", type=float, required=False, default=0.9, help="Threshold for state emissions")
+parser.add_argument("-m", "--markers", nargs='+', required=False, default=["H3K27ac", "H3K4me3"], help="ChIP-Seq markers that indicate an enhancer")
+parser.add_argument("-o", "--output", type=str, required=True, help="Path to output bed with enhancer positions")
+
+args = parser.parse_args()
+
+path_emissions = args.emissions
+path_bed = args.bed
+threshold = args.threshold
+markers = args.markers
+output = args.output
+
+
+# Read emissions file for the provided markers
+emissions = pd.read_csv(path_emissions, sep = "\t")[["State (Emission order)"] + markers].rename(columns={"State (Emission order)": "State"})
+
+
+# Read input bed file and remove unecessary columns
+bed = pd.read_csv(path_bed,
+                  sep="\t",
+                  skiprows=1,
+                  names=["chr", "start", "end", "state", "score", "strand", "start_1", "end_1", "rgb"]
+                 ).drop(columns=["strand", "score", "start_1", "end_1", "rgb"])
+
+
+# Keep state if any of the markers is enriched > threshold for this state
+states = emissions[np.any([emissions[marker] >= threshold for marker in markers], axis=0)]["State"].tolist()
+
+
+# Subset bed file for selected states
+out_bed = bed[np.isin(bed["state"], states)].drop(columns=["state"])
+
+# Write output
+out_bed.to_csv(output, index=False, sep="\t", header=False)
+
diff --git a/bin/make_cellmarkfiletable.py b/bin/make_cellmarkfiletable.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+import os
+import argparse
+import pandas as pd
+
+
+# Creates a cellmarkfiletable which is needed as input for ChromHMM
+parser = argparse.ArgumentParser(description = "Script to remove full paths of input file to fit into nextflow workflow")
+parser.add_argument("--input", help = "Input directory", required = True, type = str)
+parser.add_argument("--output", help = "path for output file", required = True, type = str)
+
+args = parser.parse_args()
+
+input = args.input
+output = args.output
+
+table = pd.read_csv(input, sep = "\t", names=["state", "assay", "bam", "control"])
+
+table["bam"] = [os.path.basename(path) for path in table["bam"]]
+table["control"] = [os.path.basename(path) for path in table["control"]]
+table.to_csv(output, header=False, sep="\t", index=False)
diff --git a/bin/reformat_bam.sh b/bin/reformat_bam.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Reheaders a bam file and adds 'chr' to each chromosome
+# $1 is the input bam
+# $2 is the output bam
+
+samtools view -H $1 | \
+   sed -e 's/SN:1/SN:chr1/' | sed -e 's/SN:2/SN:chr2/' | \
+   sed -e 's/SN:3/SN:chr3/' | sed -e 's/SN:4/SN:chr4/' | \
+   sed -e 's/SN:5/SN:chr5/' | sed -e 's/SN:6/SN:chr6/' | \
+   sed -e 's/SN:7/SN:chr7/' | sed -e 's/SN:8/SN:chr8/' | \
+   sed -e 's/SN:9/SN:chr9/' | sed -e 's/SN:10/SN:chr10/' | \
+   sed -e 's/SN:11/SN:chr11/' | sed -e 's/SN:12/SN:chr12/' | \
+   sed -e 's/SN:13/SN:chr13/' | sed -e 's/SN:14/SN:chr14/' | \
+   sed -e 's/SN:15/SN:chr15/' | sed -e 's/SN:16/SN:chr16/' | \
+   sed -e 's/SN:17/SN:chr17/' | sed -e 's/SN:18/SN:chr18/' | \
+   sed -e 's/SN:19/SN:chr19/' | sed -e 's/SN:20/SN:chr20/' | \
+   sed -e 's/SN:21/SN:chr21/' | sed -e 's/SN:22/SN:chr22/' | \
+   sed -e 's/SN:X/SN:chrX/' | sed -e 's/SN:Y/SN:chrY/' | \
+   sed -e 's/SN:MT/SN:chrM/' | samtools reheader - $1 > $2
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,5 @@ results/ @@
     testing/
     testing*
     *.pyc
+    *.swp
+    runner/