Skip to content

Commit

Permalink
finished novel RNA detection algorithm and also made changes in how
Browse files Browse the repository at this point in the history
workflow was being called
  • Loading branch information
mshakya committed Jul 19, 2018
1 parent 80107ed commit abe75bc
Show file tree
Hide file tree
Showing 13 changed files with 796 additions and 621 deletions.
383 changes: 134 additions & 249 deletions bin/runPiReT

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions pypiret/Checks/GFF3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""Check design."""

import sys
import os
import re
import collections
import pandas as pd
Expand All @@ -13,9 +12,9 @@ class CheckGFF():

def __init__(self, gff_file):
"""Initialize."""
self.gff_col = pd.read_table(gff_file, sep='\t', comment = "#")
self.gff_col = pd.read_table(gff_file, sep='\t', comment="#")
self.gff_col.columns = ["seqid", "source", "type", "start", "end",
"score", "strand", "phase", "attributes"]
"score", "strand", "phase", "attributes"]
def size(self):
"""Check if gff file has 9 columns."""
if len(self.gff_col.columns) != 9:
Expand Down
58 changes: 53 additions & 5 deletions pypiret/Runs/DGE.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,17 @@ def summ_summ(self):
"""Summarize the summary table to be displayed in edge"""
edger_dir = self.workdir + "/edgeR/" + self.kingdom
all_files = os.listdir(edger_dir)
out_file = os.path.join(edger_dir, "summary_updown.csv")
summ_files = [pd.read_csv(os.path.join(edger_dir, file),
if all_files:
out_file = os.path.join(edger_dir, "summary_updown.csv")
summ_files = [pd.read_csv(os.path.join(edger_dir, file),
index_col=0) for file in all_files if "summary.csv" in file ]
summ_df = pd.concat(summ_files)
summ_df.to_csv(out_file)
summ_df = pd.concat(summ_files)
summ_df.to_csv(out_file)


@requires(Summ.FeatureCounts)
class DESeq2(luigi.Task):
"""Find DGE using edgeR."""
"""Find DGE using DESeq2."""

exp_design = luigi.Parameter()
p_value = luigi.FloatParameter()
Expand Down Expand Up @@ -117,3 +118,50 @@ def program_environment(self):
"""Environmental variables for this program."""
scriptdir = os.path.join(self.bindir, "/../scripts/")
return {'PATH': scriptdir + ":" + os.environ["PATH"]}


@inherits(Summ.ReStringTieScoresW)
class ballgown(luigi.Task):
"""Find DGE using ballgown."""

exp_design = luigi.Parameter()
p_value = luigi.FloatParameter()
bindir = luigi.Parameter()

def output(self):
"""Expected output of DGE using edgeR."""
bg_rdir = os.path.join(self.workdir, "bg_results", self.kingdom)
return LocalTarget(bg_rdir)

def run(self):
"""Run ballgown."""
# Rscript scripts/ballgown.R -i tests/test_euk/ballgown/ -e test_euk.txt -o test_ballgown -n exon
bg_dir = os.path.join(self.workdir, "ballgown", self.kingdom)
bg_results = os.path.join(self.workdir, "bg_results", self.kingdom)
if os.path.isdir(bg_results) is False:
os.makedirs(bg_results)
bg_loc = os.path.join(self.bindir, "../scripts/ballgown.R")

for name in ["gene", "transcript"]:
bg_list = [bg_loc, "-i", bg_dir, "-e", self.exp_design,
"-n", name, "-p", self.p_value,
"-o", bg_results]
bg_cmd = Rscript[bg_list]
bg_cmd()

# self.summ_summ()

# def summ_summ(self):
# """Summarize the summary table to be displayed in edge"""
# deseq2_dir = self.workdir + "/DESeq2/" + self.kingdom
# all_files = os.listdir(deseq2_dir)
# out_file = os.path.join(deseq2_dir, "summary_updown.csv")
# summ_files = [pd.read_csv(os.path.join(deseq2_dir, file),
# index_col=0) for file in all_files if "summary.csv" in file ]
# summ_df = pd.concat(summ_files)
# summ_df.to_csv(out_file)

def program_environment(self):
"""Environmental variables for this program."""
scriptdir = os.path.join(self.bindir, "/../scripts/")
return {'PATH': scriptdir + ":" + os.environ["PATH"]}
31 changes: 15 additions & 16 deletions pypiret/Runs/Map.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ def output(self):
def run(self):
"""Run stringtie."""
stringtie_opt = ["-o", self.out_gtf,
"-p", self.num_cpus,
"-G", self.gff_file,
"-C", self.out_cover,
"-A", self.out_abun,
Expand All @@ -513,10 +514,9 @@ def run(self):
stringtie_cmd()


# @inherits(GFF2GTF)
@inherits(HisatMapW)
class StringTieScoresW(luigi.WrapperTask):
"""From Mapping to Counting step for Eukaryotic reference."""
"""Wrapper function for stringtie in all samples"""

gff_file = Parameter()
kingdom = Parameter()
Expand All @@ -542,7 +542,6 @@ def requires(self):
apd = '_prok'
elif self.kingdom == 'eukarya':
apd = '_euk'
gff_name = self.gff_file.split(".gff")[0].split("/")[-1]

yield StringTieScores(fastqs=[trim_dir + "/" + samp + ".1.trimmed.fastq",
trim_dir + "/" + samp + ".2.trimmed.fastq"],
Expand All @@ -552,17 +551,17 @@ def requires(self):
outsam=map_dir + "/" + samp + ".sam",
ref_file=self.ref_file,
gff_file=self.gff_file,
out_gtf=stng_dir + "/" + samp + "_" + gff_name + apd + "_sTie.gtf",
out_cover=stng_dir + "/" + samp + "_" + gff_name + apd + "_covered_sTie.gtf",
out_abun=stng_dir + "/" + samp + "_" + gff_name + apd + "_sTie.tab",
out_gtf=stng_dir + "/" + samp + apd + "_sTie.gtf",
out_cover=stng_dir + "/" + samp + apd + "_covered_sTie.gtf",
out_abun=stng_dir + "/" + samp + apd + "_sTie.tab",
in_bam_file=map_dir + "/" + samp + "_srt.bam",
workdir=self.workdir,
sample=samp,
qc_outdir=trim_dir,
map_dir=map_dir)
elif self.kingdom == 'both':
prok_gff = os.path.basename(self.gff_file.split(",")[0]).split(".gff")[0]
euk_gff = os.path.basename(self.gff_file.split(",")[1]).split(".gff")[0]
prok_gff = self.gff_file.split(",")[0]
euk_gff = self.gff_file.split(",")[1]
yield StringTieScores(fastqs=[trim_dir + "/" + samp +
".1.trimmed.fastq",
trim_dir + "/" + samp +
Expand All @@ -572,12 +571,12 @@ def requires(self):
spliceFile=splice_file,
outsam=map_dir + "/" + samp + ".sam",
ref_file=self.ref_file,
out_gtf=stng_dir + "/" + samp + "_" + prok_gff + "_prok" + "_sTie.gtf",
out_cover=stng_dir + "/" + samp + "_" + prok_gff + "_prok" + "_covered_sTie.gtf",
out_abun=stng_dir + "/" + samp + "_" + prok_gff + "_prok" + "_sTie.tab",
out_gtf=stng_dir + "/" + samp + "_prok" + "_sTie.gtf",
out_cover=stng_dir + "/" + samp + "_prok" + "_covered_sTie.gtf",
out_abun=stng_dir + "/" + samp + "_prok" + "_sTie.tab",
in_bam_file=map_dir + "/" + samp + "_srt_prok.bam",
workdir=self.workdir,
gff_file=self.gff_file.split(",")[0],
gff_file=prok_gff,
sample=samp,
qc_outdir=trim_dir,
map_dir=map_dir)
Expand All @@ -588,12 +587,12 @@ def requires(self):
spliceFile=splice_file,
outsam=map_dir + "/" + samp + ".sam",
ref_file=self.ref_file,
out_gtf=stng_dir + "/" + samp + "_" + euk_gff + "_euk" + "_sTie.gtf",
out_cover=stng_dir + "/" + samp + "_" + euk_gff + "_euk" + "_covered_sTie.gtf",
out_abun=stng_dir + "/" + samp + "_" + euk_gff + "_euk" + "_sTie.tab",
out_gtf=os.path.join(stng_dir, samp + "_euk_sTie.gtf"),
out_cover=os.path.join(stng_dir, samp + "_euk_covered_sTie.gtf"),
out_abun=os.path.join(stng_dir, samp + "_euk_sTie.tab"),
in_bam_file=map_dir + "/" + samp + "_srt_prok.bam",
workdir=self.workdir,
gff_file=self.gff_file.split(",")[1],
gff_file=euk_gff,
sample=samp,
qc_outdir=trim_dir,
map_dir=map_dir)
Expand Down
Loading

0 comments on commit abe75bc

Please sign in to comment.