-
Notifications
You must be signed in to change notification settings - Fork 2
/
convert_sj_to_psi.smk
111 lines (95 loc) · 3.34 KB
/
convert_sj_to_psi.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
####GTF
gtf = "/SAN/vyplab/vyplab_reference_genomes/annotation/human/GRCh38/gencode.v34.annotation.gtf"
####Folders and all the other stuff
####humans END in backslash
out_spot = "normalized_annotated/"
input_sj_folder = "/SAN/vyplab/alb_projects/data/sinai_splice_junctions/sinai_all_samples_renamed_sj_tabs/"
sj_suffix = ".SJ.out.tab"
####cell lines
# input_sj_folder = "/SAN/vyplab/alb_projects/data/sinai_splice_junctions/all_bams_kds_linked/sj_files_only/"
# =-------DON"T TOUCH ANYTHING PAST THIS POINT ----------------------------
def get_single_psi_parsed_files_dasper(SAMPLES):
"""
return a list of files that will exist
"""
parsed_psi_files = [os.path.join(output_dir,x + "_normalized_annotated.csv") for x in SAMPLES]
return(parsed_psi_files)
output_dir = os.path.join(input_sj_folder,out_spot)
# print(bam_dir)
SAMPLES, = glob_wildcards(input_sj_folder + "{sample}" + sj_suffix)
rule all_normalize_annotate:
input:
expand(output_dir + "{sample}" + "_normalized_annotated.csv", sample = SAMPLES),
expand(output_dir + "beds/" + "{sample}" + "_normalized_annotated.bed", sample = SAMPLES),
output_dir + "beds/beds_dones"
# os.path.join(output_dir, "normalized_annotated_combined_samples.csv"),
# os.path.join(output_dir, "normalized_annotated_combined_samples.csv")
rule normalize_annotate:
input:
input_sj_folder + "{sample}" + sj_suffix
output:
output_dir + "{sample}" + "_normalized_annotated.csv"
params:
gtf = gtf,
sample_name = "{sample}",
output_folder = output_dir,
mincount = 1
shell:
"""
mkdir -p {output_dir}
Rscript convert_sj_to_psi.R \
--sample_name {params.sample_name} \
--sample_file {input} \
--gtf {params.gtf} \
--output_folder {params.output_folder} -m {params.mincount}
"""
rule to_bed:
input:
output_dir + "{sample}" + "_normalized_annotated.csv"
output:
output_dir + "beds/" + "{sample}" + "_normalized_annotated.bed"
group: "to_bed"
params:
bed_dir = output_dir + "beds/"
shell:
"""
mkdir -p {params.bed_dir}
python3 splice_junction_psi_tobed.py -i {input} -o {output}
"""
rule dummy_agg_to_bed:
input:
expand(output_dir + "beds/" + "{sample}" + "_normalized_annotated.bed",sample=SAMPLES)
output:
output_dir + "beds/beds_dones"
group: "to_bed"
shell:
"""
touch {output}
"""
# rule squashed_normalize_annotate:
# input:
# all_parsed_csvs = get_single_psi_parsed_files_dasper(SAMPLES)
# output:
# os.path.join(output_dir, "normalized_annotated_combined_samples.csv")
# params:
# dir_of_normed = output_dir
# shell:
# """
# Rscript scripts/combine_annotated_psi.R \
# --folder {params.dir_of_normed} \
# --out {output}
# """
# rule squashed_normalize_annotate:
# input:
# all_parsed_csvs = get_single_psi_parsed_files_dasper(SAMPLES)
# output:
# os.path.join(output_dir, "normalized_annotated_combined_samples.RDS")
# params:
# dir_of_normed = output_dir
# shell:
# """
# Rscript scripts/combine_annotated_psi.R \
# --folder {params.dir_of_normed} \
# --out {output}
# """